HBASE-3290 Max Compaction Size

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1041278 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2010-12-02 04:40:55 +00:00
parent 953068adbb
commit fc59f7d77c
4 changed files with 422 additions and 120 deletions

View File

@ -15,6 +15,7 @@ Release 0.91.0 - Unreleased
IMPROVEMENTS IMPROVEMENTS
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via
Andrew Purtell) Andrew Purtell)
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)
NEW FEATURES NEW FEATURES
HBASE-3287 Add option to cache blocks on hfile write and evict blocks on HBASE-3287 Add option to cache blocks on hfile write and evict blocks on

View File

@ -53,6 +53,8 @@ import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import com.google.common.base.Predicate;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
@ -91,8 +93,10 @@ public class Store implements HeapSize {
// ttl in milliseconds. // ttl in milliseconds.
protected long ttl; protected long ttl;
private long majorCompactionTime; private long majorCompactionTime;
private final int minFilesToCompact;
private final int maxFilesToCompact; private final int maxFilesToCompact;
private final long minCompactSize; private final long minCompactSize;
private final long maxCompactSize;
// compactRatio: double on purpose! Float.MAX < Long.MAX < Double.MAX // compactRatio: double on purpose! Float.MAX < Long.MAX < Double.MAX
// With float, java will downcast your long to float for comparisons (bad) // With float, java will downcast your long to float for comparisons (bad)
private double compactRatio; private double compactRatio;
@ -119,7 +123,6 @@ public class Store implements HeapSize {
new CopyOnWriteArraySet<ChangedReadersObserver>(); new CopyOnWriteArraySet<ChangedReadersObserver>();
private final Object compactLock = new Object(); private final Object compactLock = new Object();
private final int compactionThreshold;
private final int blocksize; private final int blocksize;
private final boolean blockcache; private final boolean blockcache;
/** Compression algorithm for flush files and minor compaction */ /** Compression algorithm for flush files and minor compaction */
@ -177,10 +180,10 @@ public class Store implements HeapSize {
this.memstore = new MemStore(this.comparator); this.memstore = new MemStore(this.comparator);
this.storeNameStr = Bytes.toString(this.family.getName()); this.storeNameStr = Bytes.toString(this.family.getName());
// By default, we compact if an HStore has more than // By default, compact if storefile.count >= minFilesToCompact
// MIN_COMMITS_FOR_COMPACTION map files this.minFilesToCompact = Math.max(2,
this.compactionThreshold = Math.max(2, conf.getInt("hbase.hstore.compaction.min",
conf.getInt("hbase.hstore.compactionThreshold", 3)); /*old name*/ conf.getInt("hbase.hstore.compactionThreshold", 3)));
// Check if this is in-memory store // Check if this is in-memory store
this.inMemory = family.isInMemory(); this.inMemory = family.isInMemory();
@ -198,7 +201,10 @@ public class Store implements HeapSize {
this.majorCompactionTime = getNextMajorCompactTime(); this.majorCompactionTime = getNextMajorCompactTime();
this.maxFilesToCompact = conf.getInt("hbase.hstore.compaction.max", 10); this.maxFilesToCompact = conf.getInt("hbase.hstore.compaction.max", 10);
this.minCompactSize = this.region.memstoreFlushSize * 3 / 2; // +50% pad this.minCompactSize = conf.getLong("hbase.hstore.compaction.min.size",
this.region.memstoreFlushSize);
this.maxCompactSize
= conf.getLong("hbase.hstore.compaction.max.size", 0);
this.compactRatio = conf.getFloat("hbase.hstore.compaction.ratio", 1.2F); this.compactRatio = conf.getFloat("hbase.hstore.compaction.ratio", 1.2F);
if (Store.closeCheckInterval == 0) { if (Store.closeCheckInterval == 0) {
@ -552,7 +558,7 @@ public class Store implements HeapSize {
// Tell listeners of the change in readers. // Tell listeners of the change in readers.
notifyChangedReadersObservers(); notifyChangedReadersObservers();
return this.storefiles.size() >= this.compactionThreshold; return this.storefiles.size() >= this.minFilesToCompact;
} finally { } finally {
this.lock.writeLock().unlock(); this.lock.writeLock().unlock();
} }
@ -609,129 +615,55 @@ public class Store implements HeapSize {
*/ */
StoreSize compact(final boolean forceMajor) throws IOException { StoreSize compact(final boolean forceMajor) throws IOException {
boolean forceSplit = this.region.shouldSplit(false); boolean forceSplit = this.region.shouldSplit(false);
boolean majorcompaction = forceMajor;
synchronized (compactLock) { synchronized (compactLock) {
this.lastCompactSize = 0; this.lastCompactSize = 0; // reset first in case compaction is aborted
// filesToCompact are sorted oldest to newest. // sanity checks
List<StoreFile> filesToCompact = this.storefiles; for (StoreFile sf : this.storefiles) {
if (sf.getPath() == null || sf.getReader() == null) {
boolean np = sf.getPath() == null;
LOG.debug("StoreFile " + sf + " has null " + (np ? "Path":"Reader"));
return null;
}
}
// if the user wants to force a split, skip compaction unless necessary
boolean references = hasReferences(this.storefiles);
if (forceSplit && !forceMajor && !references) {
return checkSplit(forceSplit);
}
Collection<StoreFile> filesToCompact
= compactSelection(this.storefiles, forceMajor);
// empty == do not compact
if (filesToCompact.isEmpty()) { if (filesToCompact.isEmpty()) {
LOG.debug(this.storeNameStr + ": no store files to compact"); // but do see if we need to split before returning
return null;
}
// Check to see if we need to do a major compaction on this region.
// If so, change doMajorCompaction to true to skip the incremental
// compacting below. Only check if doMajorCompaction is not true.
if (!majorcompaction) {
majorcompaction = isMajorCompaction(filesToCompact);
}
boolean references = hasReferences(filesToCompact);
if (!majorcompaction && !references &&
(forceSplit || (filesToCompact.size() < compactionThreshold))) {
return checkSplit(forceSplit); return checkSplit(forceSplit);
} }
/* get store file sizes for incremental compacting selection. // sum size of all files included in compaction
* normal skew:
*
* older ----> newer
* _
* | | _
* | | | | _
* --|-|- |-|- |-|---_-------_------- minCompactSize
* | | | | | | | | _ | |
* | | | | | | | | | | | |
* | | | | | | | | | | | |
*/
int countOfFiles = filesToCompact.size();
long [] fileSizes = new long[countOfFiles];
long [] sumSize = new long[countOfFiles];
for (int i = countOfFiles-1; i >= 0; --i) {
StoreFile file = filesToCompact.get(i);
Path path = file.getPath();
if (path == null) {
LOG.error("Path is null for " + file);
return null;
}
StoreFile.Reader r = file.getReader();
if (r == null) {
LOG.error("StoreFile " + file + " has a null Reader");
return null;
}
fileSizes[i] = file.getReader().length();
// calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo
int tooFar = i + this.maxFilesToCompact - 1;
sumSize[i] = fileSizes[i]
+ ((i+1 < countOfFiles) ? sumSize[i+1] : 0)
- ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0);
}
long totalSize = 0; long totalSize = 0;
if (!majorcompaction && !references) { for (StoreFile sf : filesToCompact) {
// we're doing a minor compaction, let's see what files are applicable totalSize += sf.getReader().length();
int start = 0;
double r = this.compactRatio;
/* Start at the oldest file and stop when you find the first file that
* meets compaction criteria:
* (1) a recently-flushed, small file (i.e. <= minCompactSize)
* OR
* (2) within the compactRatio of sum(newer_files)
* Given normal skew, any newer files will also meet this criteria
*
* Additional Note:
* If fileSizes.size() >> maxFilesToCompact, we will recurse on
* compact(). Consider the oldest files first to avoid a
* situation where we always compact [end-threshold,end). Then, the
* last file becomes an aggregate of the previous compactions.
*/
while(countOfFiles - start >= this.compactionThreshold &&
fileSizes[start] >
Math.max(minCompactSize, (long)(sumSize[start+1] * r))) {
++start;
}
int end = Math.min(countOfFiles, start + this.maxFilesToCompact);
totalSize = fileSizes[start]
+ ((start+1 < countOfFiles) ? sumSize[start+1] : 0);
// if we don't have enough files to compact, just wait
if (end - start < this.compactionThreshold) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skipped compaction of " + this.storeNameStr
+ " because only " + (end - start) + " file(s) of size "
+ StringUtils.humanReadableInt(totalSize)
+ " meet compaction criteria.");
}
return checkSplit(forceSplit);
}
if (0 == start && end == countOfFiles) {
// we decided all the files were candidates! major compact
majorcompaction = true;
} else {
filesToCompact = new ArrayList<StoreFile>(filesToCompact.subList(start,
end));
}
} else {
// all files included in this compaction
for (long i : fileSizes) {
totalSize += i;
}
} }
this.lastCompactSize = totalSize; this.lastCompactSize = totalSize;
// major compaction iff all StoreFiles are included
boolean majorcompaction
= (filesToCompact.size() == this.storefiles.size());
// Max-sequenceID is the last key in the files we're compacting // Max-sequenceID is the last key in the files we're compacting
long maxId = StoreFile.getMaxSequenceIdInList(filesToCompact); long maxId = StoreFile.getMaxSequenceIdInList(filesToCompact);
// Ready to go. Have list of files to compact. // Ready to go. Have list of files to compact.
LOG.info("Started compaction of " + filesToCompact.size() + " file(s) in cf=" + LOG.info("Started compaction of " + filesToCompact.size() + " file(s) in cf=" +
this.storeNameStr + this.storeNameStr +
(references? ", hasReferences=true,": " ") + " into " + (hasReferences(filesToCompact)? ", hasReferences=true,": " ") + " into " +
region.getTmpDir() + ", seqid=" + maxId + region.getTmpDir() + ", seqid=" + maxId +
", totalSize=" + StringUtils.humanReadableInt(totalSize)); ", totalSize=" + StringUtils.humanReadableInt(totalSize));
StoreFile.Writer writer = compact(filesToCompact, majorcompaction, maxId); StoreFile.Writer writer
= compactStore(filesToCompact, majorcompaction, maxId);
// Move the compaction into place. // Move the compaction into place.
StoreFile sf = completeCompaction(filesToCompact, writer); StoreFile sf = completeCompaction(filesToCompact, writer);
if (LOG.isInfoEnabled()) { if (LOG.isInfoEnabled()) {
@ -761,7 +693,8 @@ public class Store implements HeapSize {
boolean majorcompaction = (N == count); boolean majorcompaction = (N == count);
// Ready to go. Have list of files to compact. // Ready to go. Have list of files to compact.
StoreFile.Writer writer = compact(filesToCompact, majorcompaction, maxId); StoreFile.Writer writer
= compactStore(filesToCompact, majorcompaction, maxId);
// Move the compaction into place. // Move the compaction into place.
StoreFile sf = completeCompaction(filesToCompact, writer); StoreFile sf = completeCompaction(filesToCompact, writer);
} }
@ -873,7 +806,149 @@ public class Store implements HeapSize {
} }
/** /**
* Do a minor/major compaction. Uses the scan infrastructure to make it easy. * Algorithm to choose which files to compact
*
* Configuration knobs:
* "hbase.hstore.compaction.ratio"
* normal case: minor compact when file <= sum(smaller_files) * ratio
* "hbase.hstore.compaction.min.size"
* unconditionally compact individual files below this size
* "hbase.hstore.compaction.max.size"
* never compact individual files above this size (unless splitting)
* "hbase.hstore.compaction.min"
* min files needed to minor compact
* "hbase.hstore.compaction.max"
* max files to compact at once (avoids OOM)
*
* @param candidates candidate files, ordered from oldest to newest
* @param majorcompaction whether to force a major compaction
* @return subset copy of candidate list that meets compaction criteria
* @throws IOException
*/
List<StoreFile> compactSelection(List<StoreFile> candidates,
boolean forcemajor) throws IOException {
/* normal skew:
*
* older ----> newer
* _
* | | _
* | | | | _
* --|-|- |-|- |-|---_-------_------- minCompactSize
* | | | | | | | | _ | |
* | | | | | | | | | | | |
* | | | | | | | | | | | |
*/
List<StoreFile> filesToCompact = new ArrayList<StoreFile>(candidates);
// Do not compact files above a configurable max filesize unless they are
// references. We MUST compact these
if (this.maxCompactSize > 0) {
final long msize = this.maxCompactSize;
filesToCompact.removeAll(Collections2.filter(filesToCompact,
new Predicate<StoreFile>() {
public boolean apply(StoreFile sf) {
// NOTE: keep all references. we must compact them
return sf.getReader().length() > msize && !sf.isReference();
}
}));
}
// major compact on user action or age (caveat: we have too many files)
boolean majorcompaction = forcemajor ||
(isMajorCompaction(filesToCompact) &&
filesToCompact.size() > this.maxFilesToCompact);
if (filesToCompact.isEmpty()) {
LOG.debug(this.storeNameStr + ": no store files to compact");
return filesToCompact;
}
if (!majorcompaction && !hasReferences(filesToCompact)) {
// we're doing a minor compaction, let's see what files are applicable
int start = 0;
double r = this.compactRatio;
// Sort files by size to correct when normal skew is altered by bulk load.
//
// So, technically, order is important for optimizations like the TimeStamp
// filter. However, realistically this isn't a problem because our normal
// skew always decreases in filesize over time. The only place where our
// skew doesn't decrease is for files that have been recently flushed.
// However, all those will be unconditionally compacted because they will
// be lower than "hbase.hstore.compaction.min.size".
//
// The sorting is to handle an interesting issue that popped up for us
// during migration: we're bulk loading StoreFiles of extremely variable
// size (are we migrating 1k users or 10M?) and they will all appear at
// the end of the StoreFile list. How do we determine when it is
// efficient to compact them? The easiest option was to sort the compact
// list and handle bulk files by relative size instead of making some
// custom compaction selection algorithm just for bulk inclusion. It
// seems like any other companies that will incrementally migrate data
// into HBase would hit the same issue. Nicolas.
//
Collections.sort(filesToCompact, StoreFile.Comparators.FILE_SIZE);
// get store file sizes for incremental compacting selection.
int countOfFiles = filesToCompact.size();
long [] fileSizes = new long[countOfFiles];
long [] sumSize = new long[countOfFiles];
for (int i = countOfFiles-1; i >= 0; --i) {
StoreFile file = filesToCompact.get(i);
fileSizes[i] = file.getReader().length();
// calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo
int tooFar = i + this.maxFilesToCompact - 1;
sumSize[i] = fileSizes[i]
+ ((i+1 < countOfFiles) ? sumSize[i+1] : 0)
- ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0);
}
/* Start at the oldest file and stop when you find the first file that
* meets compaction criteria:
* (1) a recently-flushed, small file (i.e. <= minCompactSize)
* OR
* (2) within the compactRatio of sum(newer_files)
* Given normal skew, any newer files will also meet this criteria
*
* Additional Note:
* If fileSizes.size() >> maxFilesToCompact, we will recurse on
* compact(). Consider the oldest files first to avoid a
* situation where we always compact [end-threshold,end). Then, the
* last file becomes an aggregate of the previous compactions.
*/
while(countOfFiles - start >= this.minFilesToCompact &&
fileSizes[start] >
Math.max(minCompactSize, (long)(sumSize[start+1] * r))) {
++start;
}
int end = Math.min(countOfFiles, start + this.maxFilesToCompact);
long totalSize = fileSizes[start]
+ ((start+1 < countOfFiles) ? sumSize[start+1] : 0);
filesToCompact = filesToCompact.subList(start, end);
// if we don't have enough files to compact, just wait
if (filesToCompact.size() < this.minFilesToCompact) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skipped compaction of " + this.storeNameStr
+ ". Only " + (end - start) + " file(s) of size "
+ StringUtils.humanReadableInt(totalSize)
+ " have met compaction criteria.");
}
return Collections.emptyList();
}
} else {
// all files included in this compaction, up to max
if (filesToCompact.size() > this.maxFilesToCompact) {
int pastMax = filesToCompact.size() - this.maxFilesToCompact;
filesToCompact.subList(0, pastMax).clear();
}
}
return filesToCompact;
}
/**
* Do a minor/major compaction on an explicit set of storefiles in a Store.
* Uses the scan infrastructure to make it easy.
* *
* @param filesToCompact which files to compact * @param filesToCompact which files to compact
* @param majorCompaction true to major compact (prune all deletes, max versions, etc) * @param majorCompaction true to major compact (prune all deletes, max versions, etc)
@ -882,7 +957,7 @@ public class Store implements HeapSize {
* nothing made it through the compaction. * nothing made it through the compaction.
* @throws IOException * @throws IOException
*/ */
private StoreFile.Writer compact(final List<StoreFile> filesToCompact, private StoreFile.Writer compactStore(final Collection<StoreFile> filesToCompact,
final boolean majorCompaction, final long maxId) final boolean majorCompaction, final long maxId)
throws IOException { throws IOException {
// calculate maximum key count after compaction (for blooms) // calculate maximum key count after compaction (for blooms)
@ -987,7 +1062,7 @@ public class Store implements HeapSize {
* @return StoreFile created. May be null. * @return StoreFile created. May be null.
* @throws IOException * @throws IOException
*/ */
private StoreFile completeCompaction(final List<StoreFile> compactedFiles, private StoreFile completeCompaction(final Collection<StoreFile> compactedFiles,
final StoreFile.Writer compactedFile) final StoreFile.Writer compactedFile)
throws IOException { throws IOException {
// 1. Moving the new files into place -- if there is a new file (may not // 1. Moving the new files into place -- if there is a new file (may not
@ -1521,15 +1596,15 @@ public class Store implements HeapSize {
/** /**
* See if there's too much store files in this store * See if there's too much store files in this store
* @return true if number of store files is greater than * @return true if number of store files is greater than
* the number defined in compactionThreshold * the number defined in minFilesToCompact
*/ */
public boolean hasTooManyStoreFiles() { public boolean hasTooManyStoreFiles() {
return this.storefiles.size() > this.compactionThreshold; return this.storefiles.size() > this.minFilesToCompact;
} }
public static final long FIXED_OVERHEAD = ClassSize.align( public static final long FIXED_OVERHEAD = ClassSize.align(
ClassSize.OBJECT + (15 * ClassSize.REFERENCE) + ClassSize.OBJECT + (15 * ClassSize.REFERENCE) +
(6 * Bytes.SIZEOF_LONG) + (1 * Bytes.SIZEOF_DOUBLE) + (7 * Bytes.SIZEOF_LONG) + (1 * Bytes.SIZEOF_DOUBLE) +
(4 * Bytes.SIZEOF_INT) + (Bytes.SIZEOF_BOOLEAN * 2)); (4 * Bytes.SIZEOF_INT) + (Bytes.SIZEOF_BOOLEAN * 2));
public static final long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD + public static final long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD +

View File

@ -26,6 +26,7 @@ import java.lang.management.MemoryUsage;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.text.NumberFormat; import java.text.NumberFormat;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
@ -303,7 +304,7 @@ public class StoreFile {
* @return 0 if no non-bulk-load files are provided or, this is Store that * @return 0 if no non-bulk-load files are provided or, this is Store that
* does not yet have any store files. * does not yet have any store files.
*/ */
public static long getMaxSequenceIdInList(List<StoreFile> sfs) { public static long getMaxSequenceIdInList(Collection<StoreFile> sfs) {
long max = 0; long max = 0;
for (StoreFile sf : sfs) { for (StoreFile sf : sfs) {
if (!sf.isBulkLoadResult()) { if (!sf.isBulkLoadResult()) {
@ -909,6 +910,13 @@ public class StoreFile {
bloomFilterType = BloomType.NONE; bloomFilterType = BloomType.NONE;
} }
/**
* ONLY USE DEFAULT CONSTRUCTOR FOR UNIT TESTS
*/
Reader() {
this.reader = null;
}
public RawComparator<byte []> getComparator() { public RawComparator<byte []> getComparator() {
return reader.getComparator(); return reader.getComparator();
} }
@ -1132,5 +1140,15 @@ public class StoreFile {
} }
} }
/**
* FILE_SIZE = descending sort StoreFiles (largest --> smallest in size)
*/
static final Comparator<StoreFile> FILE_SIZE =
Ordering.natural().reverse().onResultOf(new Function<StoreFile, Long>() {
@Override
public Long apply(StoreFile sf) {
return sf.getReader().length();
}
});
} }
} }

View File

@ -0,0 +1,208 @@
/**
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.regionserver.StoreFile.Reader;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.regionserver.wal.TestWALReplay;
import org.apache.hadoop.hbase.util.Bytes;
import com.google.common.collect.Lists;
public class TestCompactSelection extends TestCase {
private final static Log LOG = LogFactory.getLog(TestCompactSelection.class);
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private Configuration conf;
private Store store;
private static final String DIR
= HBaseTestingUtility.getTestDir() + "/TestCompactSelection/";
private static final int minFiles = 3;
private static final int maxFiles = 5;
private static final long minSize = 10;
private static final long maxSize = 1000;
@Override
public void setUp() throws Exception {
// setup config values necessary for store
this.conf = TEST_UTIL.getConfiguration();
this.conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0);
this.conf.setInt("hbase.hstore.compaction.min", minFiles);
this.conf.setInt("hbase.hstore.compaction.max", maxFiles);
this.conf.setLong("hbase.hregion.memstore.flush.size", minSize);
this.conf.setLong("hbase.hstore.compaction.max.size", maxSize);
this.conf.setFloat("hbase.hstore.compaction.ratio", 1.0F);
//Setting up a Store
Path basedir = new Path(DIR);
Path logdir = new Path(DIR+"/logs");
Path oldLogDir = new Path(basedir, HConstants.HREGION_OLDLOGDIR_NAME);
HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes("family"));
FileSystem fs = FileSystem.get(conf);
fs.delete(logdir, true);
HTableDescriptor htd = new HTableDescriptor(Bytes.toBytes("table"));
htd.addFamily(hcd);
HRegionInfo info = new HRegionInfo(htd, null, null, false);
HLog hlog = new HLog(fs, logdir, oldLogDir, conf);
HRegion region = new HRegion(basedir, hlog, fs, conf, info, null);
store = new Store(basedir, region, hcd, fs, conf);
}
// used so our tests don't deal with actual StoreFiles
static class MockStoreFile extends StoreFile {
long length = 0;
boolean isRef = false;
MockStoreFile(long length, boolean isRef) throws IOException {
super(TEST_UTIL.getTestFileSystem(), new Path("_"), false,
TEST_UTIL.getConfiguration(), BloomType.NONE, false);
this.length = length;
this.isRef = isRef;
}
void setLength(long newLen) {
this.length = newLen;
}
@Override
boolean isMajorCompaction() {
return false;
}
@Override
boolean isReference() {
return this.isRef;
}
@Override
public StoreFile.Reader getReader() {
final long len = this.length;
return new StoreFile.Reader() {
@Override
public long length() {
return len;
}
};
}
}
List<StoreFile> sfCreate(long ... sizes) throws IOException {
return sfCreate(false, sizes);
}
List<StoreFile> sfCreate(boolean isReference, long ... sizes)
throws IOException {
List<StoreFile> ret = Lists.newArrayList();
for (long i : sizes) {
ret.add(new MockStoreFile(i, isReference));
}
return ret;
}
void compactEquals(List<StoreFile> actual, long ... expected)
throws IOException {
compactEquals(actual, false, expected);
}
void compactEquals(List<StoreFile> actual, boolean forcemajor,
long ... expected)
throws IOException {
List<StoreFile> result = store.compactSelection(actual, forcemajor);
long[] aNums = new long[result.size()];
for (int i=0; i <result.size(); ++i) {
aNums[i] = result.get(i).getReader().length();
}
assertEquals(Arrays.toString(expected), Arrays.toString(aNums));
}
public void testCompactionRatio() throws IOException {
/*
* NOTE: these tests are specific to describe the implementation of the
* current compaction algorithm. Developed to ensure that refactoring
* doesn't implicitly alter this.
*/
long tooBig = maxSize + 1;
// default case. preserve user ratio on size
compactEquals(sfCreate(100,50,23,12,12), 23, 12, 12);
// less than compact threshold = don't compact
compactEquals(sfCreate(100,50,25,12,12) /* empty */);
// greater than compact size = skip those
compactEquals(sfCreate(tooBig, tooBig, 700, 700, 700), 700, 700, 700);
// big size + threshold
compactEquals(sfCreate(tooBig, tooBig, 700,700) /* empty */);
// small files = don't care about ratio
compactEquals(sfCreate(8,3,1), 8,3,1);
// sort first so you don't include huge file the tail end
// happens with HFileOutputFormat bulk migration
compactEquals(sfCreate(100,50,23,12,12, 500), 23, 12, 12);
// don't exceed max file compact threshold
assertEquals(maxFiles,
store.compactSelection(sfCreate(7,6,5,4,3,2,1), false).size());
/* MAJOR COMPACTION */
// if a major compaction has been forced, then compact everything
compactEquals(sfCreate(100,50,25,12,12), true, 100, 50, 25, 12, 12);
// also choose files < threshold on major compaction
compactEquals(sfCreate(12,12), true, 12, 12);
// unless one of those files is too big
compactEquals(sfCreate(tooBig, 12,12), true, 12, 12);
// don't exceed max file compact threshold, even with major compaction
assertEquals(maxFiles,
store.compactSelection(sfCreate(7,6,5,4,3,2,1), true).size());
/* REFERENCES == file is from a region that was split */
// treat storefiles that have references like a major compaction
compactEquals(sfCreate(true, 100,50,25,12,12), true, 100, 50, 25, 12, 12);
// reference files shouldn't obey max threshold
compactEquals(sfCreate(true, tooBig, 12,12), true, tooBig, 12, 12);
// reference files should obey max file compact to avoid OOM
assertEquals(maxFiles,
store.compactSelection(sfCreate(true, 7,6,5,4,3,2,1), true).size());
// empty case
compactEquals(new ArrayList<StoreFile>() /* empty */);
// empty case (because all files are too big)
compactEquals(sfCreate(tooBig, tooBig) /* empty */);
}
}