HBASE-25756 Support alternate compression for major and minor compactions (#3142)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
Andrew Purtell 2021-04-23 15:45:26 -07:00 committed by GitHub
parent 96fefce9c3
commit 9895b2dfdf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 111 additions and 36 deletions

View File

@ -94,10 +94,22 @@ public interface ColumnFamilyDescriptor {
* @return Compression type setting.
*/
Compression.Algorithm getCompactionCompressionType();
/**
* @return Compression type setting for major compactions.
*/
Compression.Algorithm getMajorCompactionCompressionType();
/**
* @return Compression type setting for minor compactions.
*/
Compression.Algorithm getMinorCompactionCompressionType();
/**
* @return Compression type setting.
*/
Compression.Algorithm getCompressionType();
/**
* @return an unmodifiable map.
*/

View File

@ -75,6 +75,10 @@ public class ColumnFamilyDescriptorBuilder {
@InterfaceAudience.Private
public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT";
private static final Bytes COMPRESSION_COMPACT_BYTES = new Bytes(Bytes.toBytes(COMPRESSION_COMPACT));
public static final String COMPRESSION_COMPACT_MAJOR = "COMPRESSION_COMPACT_MAJOR";
private static final Bytes COMPRESSION_COMPACT_MAJOR_BYTES = new Bytes(Bytes.toBytes(COMPRESSION_COMPACT_MAJOR));
public static final String COMPRESSION_COMPACT_MINOR = "COMPRESSION_COMPACT_MINOR";
private static final Bytes COMPRESSION_COMPACT_MINOR_BYTES = new Bytes(Bytes.toBytes(COMPRESSION_COMPACT_MINOR));
@InterfaceAudience.Private
public static final String DATA_BLOCK_ENCODING = "DATA_BLOCK_ENCODING";
private static final Bytes DATA_BLOCK_ENCODING_BYTES = new Bytes(Bytes.toBytes(DATA_BLOCK_ENCODING));
@ -449,6 +453,16 @@ public class ColumnFamilyDescriptorBuilder {
return this;
}
public ColumnFamilyDescriptorBuilder setMajorCompactionCompressionType(Compression.Algorithm value) {
desc.setMajorCompactionCompressionType(value);
return this;
}
public ColumnFamilyDescriptorBuilder setMinorCompactionCompressionType(Compression.Algorithm value) {
desc.setMinorCompactionCompressionType(value);
return this;
}
public ColumnFamilyDescriptorBuilder setCompressTags(boolean value) {
desc.setCompressTags(value);
return this;
@ -839,6 +853,18 @@ public class ColumnFamilyDescriptorBuilder {
n -> Compression.Algorithm.valueOf(n.toUpperCase()), getCompressionType());
}
@Override
public Compression.Algorithm getMajorCompactionCompressionType() {
return getStringOrDefault(COMPRESSION_COMPACT_MAJOR_BYTES,
n -> Compression.Algorithm.valueOf(n.toUpperCase()), getCompactionCompressionType());
}
@Override
public Compression.Algorithm getMinorCompactionCompressionType() {
return getStringOrDefault(COMPRESSION_COMPACT_MINOR_BYTES,
n -> Compression.Algorithm.valueOf(n.toUpperCase()), getCompactionCompressionType());
}
/**
* Compression types supported in hbase. LZO is not bundled as part of the
* hbase distribution. See
@ -853,6 +879,16 @@ public class ColumnFamilyDescriptorBuilder {
return setValue(COMPRESSION_COMPACT_BYTES, type.name());
}
public ModifyableColumnFamilyDescriptor setMajorCompactionCompressionType(
Compression.Algorithm type) {
return setValue(COMPRESSION_COMPACT_MAJOR_BYTES, type.name());
}
public ModifyableColumnFamilyDescriptor setMinorCompactionCompressionType(
Compression.Algorithm type) {
return setValue(COMPRESSION_COMPACT_MINOR_BYTES, type.name());
}
@Override
public boolean isInMemory() {
return getStringOrDefault(IN_MEMORY_BYTES, Boolean::valueOf, DEFAULT_IN_MEMORY);

View File

@ -147,10 +147,11 @@ public class DefaultMobStoreCompactor extends DefaultCompactor {
@Override
public StoreFileWriter createWriter(InternalScanner scanner,
org.apache.hadoop.hbase.regionserver.compactions.Compactor.FileDetails fd,
boolean shouldDropBehind) throws IOException {
boolean shouldDropBehind, boolean major) throws IOException {
// make this writer with tags always because of possible new cells with tags.
return store.createWriterInTmp(fd.maxKeyCount, compactionCompression, true, true, true,
shouldDropBehind);
return store.createWriterInTmp(fd.maxKeyCount,
major ? majorCompactionCompression : minorCompactionCompression,
true, true, true, shouldDropBehind);
}
};
@ -350,7 +351,7 @@ public class DefaultMobStoreCompactor extends DefaultCompactor {
Cell mobCell = null;
try {
mobFileWriter = newMobWriter(fd);
mobFileWriter = newMobWriter(fd, major);
fileName = Bytes.toBytes(mobFileWriter.getPath().getName());
do {
@ -428,7 +429,7 @@ public class DefaultMobStoreCompactor extends DefaultCompactor {
LOG.debug("Closing output MOB File, length={} file={}, store={}", len,
mobFileWriter.getPath().getName(), getStoreInfo());
commitOrAbortMobWriter(mobFileWriter, fd.maxSeqId, mobCells, major);
mobFileWriter = newMobWriter(fd);
mobFileWriter = newMobWriter(fd, major);
fileName = Bytes.toBytes(mobFileWriter.getPath().getName());
mobCells = 0;
}
@ -472,7 +473,7 @@ public class DefaultMobStoreCompactor extends DefaultCompactor {
long len = mobFileWriter.getPos();
if (len > maxMobFileSize) {
commitOrAbortMobWriter(mobFileWriter, fd.maxSeqId, mobCells, major);
mobFileWriter = newMobWriter(fd);
mobFileWriter = newMobWriter(fd, major);
fileName = Bytes.toBytes(mobFileWriter.getPath().getName());
mobCells = 0;
}
@ -524,7 +525,7 @@ public class DefaultMobStoreCompactor extends DefaultCompactor {
long len = mobFileWriter.getPos();
if (len > maxMobFileSize) {
commitOrAbortMobWriter(mobFileWriter, fd.maxSeqId, mobCells, major);
mobFileWriter = newMobWriter(fd);
mobFileWriter = newMobWriter(fd, major);
fileName = Bytes.toBytes(mobFileWriter.getPath().getName());
mobCells = 0;
}
@ -611,11 +612,12 @@ public class DefaultMobStoreCompactor extends DefaultCompactor {
}
}
private StoreFileWriter newMobWriter(FileDetails fd)
private StoreFileWriter newMobWriter(FileDetails fd, boolean major)
throws IOException {
try {
StoreFileWriter mobFileWriter = mobStore.createWriterInTmp(new Date(fd.latestPutTs),
fd.maxKeyCount, compactionCompression, store.getRegionInfo().getStartKey(), true);
fd.maxKeyCount, major ? majorCompactionCompression : minorCompactionCompression,
store.getRegionInfo().getStartKey(), true);
LOG.debug("New MOB writer created={} store={}", mobFileWriter.getPath().getName(),
getStoreInfo());
// Add reference we get for compact MOB

View File

@ -47,17 +47,17 @@ public abstract class AbstractMultiOutputCompactor<T extends AbstractMultiFileWr
}
protected void initMultiWriter(AbstractMultiFileWriter writer, InternalScanner scanner,
final FileDetails fd, final boolean shouldDropBehind) {
final FileDetails fd, final boolean shouldDropBehind, boolean major) {
WriterFactory writerFactory = new WriterFactory() {
@Override
public StoreFileWriter createWriter() throws IOException {
return createTmpWriter(fd, shouldDropBehind);
return createTmpWriter(fd, shouldDropBehind, major);
}
@Override
public StoreFileWriter createWriterWithStoragePolicy(String fileStoragePolicy)
throws IOException {
return createTmpWriter(fd, shouldDropBehind, fileStoragePolicy);
return createTmpWriter(fd, shouldDropBehind, fileStoragePolicy, major);
}
};
// Prepare multi-writer, and perform the compaction using scanner and writer.

View File

@ -76,7 +76,8 @@ public abstract class Compactor<T extends CellSink> {
protected final HStore store;
protected final int compactionKVMax;
protected final Compression.Algorithm compactionCompression;
protected final Compression.Algorithm majorCompactionCompression;
protected final Compression.Algorithm minorCompactionCompression;
/** specify how many days to keep MVCC values during major compaction **/
protected int keepSeqIdPeriod;
@ -96,8 +97,10 @@ public abstract class Compactor<T extends CellSink> {
this.store = store;
this.compactionKVMax =
this.conf.getInt(HConstants.COMPACTION_KV_MAX, HConstants.COMPACTION_KV_MAX_DEFAULT);
this.compactionCompression = (this.store.getColumnFamilyDescriptor() == null) ?
Compression.Algorithm.NONE : this.store.getColumnFamilyDescriptor().getCompactionCompressionType();
this.majorCompactionCompression = (store.getColumnFamilyDescriptor() == null) ?
Compression.Algorithm.NONE : store.getColumnFamilyDescriptor().getMajorCompactionCompressionType();
this.minorCompactionCompression = (store.getColumnFamilyDescriptor() == null) ?
Compression.Algorithm.NONE : store.getColumnFamilyDescriptor().getMinorCompactionCompressionType();
this.keepSeqIdPeriod = Math.max(this.conf.getInt(HConstants.KEEP_SEQID_PERIOD,
HConstants.MIN_KEEP_SEQID_PERIOD), HConstants.MIN_KEEP_SEQID_PERIOD);
this.dropCacheMajor = conf.getBoolean(MAJOR_COMPACTION_DROP_CACHE, true);
@ -107,7 +110,7 @@ public abstract class Compactor<T extends CellSink> {
protected interface CellSinkFactory<S> {
S createWriter(InternalScanner scanner, FileDetails fd, boolean shouldDropBehind)
S createWriter(InternalScanner scanner, FileDetails fd, boolean shouldDropBehind, boolean major)
throws IOException;
}
@ -139,10 +142,11 @@ public abstract class Compactor<T extends CellSink> {
* Extracts some details about the files to compact that are commonly needed by compactors.
* @param filesToCompact Files.
* @param allFiles Whether all files are included for compaction
* @parma major If major compaction
* @return The result.
*/
private FileDetails getFileDetails(
Collection<HStoreFile> filesToCompact, boolean allFiles) throws IOException {
Collection<HStoreFile> filesToCompact, boolean allFiles, boolean major) throws IOException {
FileDetails fd = new FileDetails();
long oldestHFileTimestampToKeepMVCC = System.currentTimeMillis() -
(1000L * 60 * 60 * 24 * this.keepSeqIdPeriod);
@ -212,7 +216,7 @@ public abstract class Compactor<T extends CellSink> {
r.getBloomFilterType().toString(),
TraditionalBinaryPrefix.long2String(r.length(), "", 1),
r.getHFileReader().getDataBlockEncoding(),
compactionCompression,
major ? majorCompactionCompression : minorCompactionCompression,
seqNum,
(allFiles? ", earliestPutTs=" + earliestPutTs: ""));
}
@ -263,21 +267,23 @@ public abstract class Compactor<T extends CellSink> {
* @return Writer for a new StoreFile in the tmp dir.
* @throws IOException if creation failed
*/
protected final StoreFileWriter createTmpWriter(FileDetails fd, boolean shouldDropBehind)
protected final StoreFileWriter createTmpWriter(FileDetails fd, boolean shouldDropBehind, boolean major)
throws IOException {
// When all MVCC readpoints are 0, don't write them.
// See HBASE-8166, HBASE-12600, and HBASE-13389.
return store
.createWriterInTmp(fd.maxKeyCount, this.compactionCompression, true, fd.maxMVCCReadpoint > 0,
return store.createWriterInTmp(fd.maxKeyCount,
major ? majorCompactionCompression : minorCompactionCompression,
true, fd.maxMVCCReadpoint > 0,
fd.maxTagsLength > 0, shouldDropBehind, fd.totalCompactedFilesSize,
HConstants.EMPTY_STRING);
}
protected final StoreFileWriter createTmpWriter(FileDetails fd, boolean shouldDropBehind,
String fileStoragePolicy) throws IOException {
return store
.createWriterInTmp(fd.maxKeyCount, this.compactionCompression, true, fd.maxMVCCReadpoint > 0,
fd.maxTagsLength > 0, shouldDropBehind, fd.totalCompactedFilesSize, fileStoragePolicy);
String fileStoragePolicy, boolean major) throws IOException {
return store.createWriterInTmp(fd.maxKeyCount,
major ? majorCompactionCompression : minorCompactionCompression,
true, fd.maxMVCCReadpoint > 0,
fd.maxTagsLength > 0, shouldDropBehind, fd.totalCompactedFilesSize, fileStoragePolicy);
}
private ScanInfo preCompactScannerOpen(CompactionRequestImpl request, ScanType scanType,
@ -308,7 +314,7 @@ public abstract class Compactor<T extends CellSink> {
protected final List<Path> compact(final CompactionRequestImpl request,
InternalScannerFactory scannerFactory, CellSinkFactory<T> sinkFactory,
ThroughputController throughputController, User user) throws IOException {
FileDetails fd = getFileDetails(request.getFiles(), request.isAllFiles());
FileDetails fd = getFileDetails(request.getFiles(), request.isAllFiles(), request.isMajor());
this.progress = new CompactionProgress(fd.maxKeyCount);
// Find the smallest read point across all the Scanners.
@ -338,7 +344,7 @@ public abstract class Compactor<T extends CellSink> {
smallestReadPoint = Math.min(fd.minSeqIdToKeep, smallestReadPoint);
cleanSeqId = true;
}
writer = sinkFactory.createWriter(scanner, fd, dropCache);
writer = sinkFactory.createWriter(scanner, fd, dropCache, request.isMajor());
finished = performCompaction(fd, scanner, writer, smallestReadPoint, cleanSeqId,
throughputController, request.isAllFiles(), request.getFiles().size());
if (!finished) {

View File

@ -68,11 +68,11 @@ public class DateTieredCompactor extends AbstractMultiOutputCompactor<DateTiered
@Override
public DateTieredMultiFileWriter createWriter(InternalScanner scanner, FileDetails fd,
boolean shouldDropBehind) throws IOException {
boolean shouldDropBehind, boolean major) throws IOException {
DateTieredMultiFileWriter writer = new DateTieredMultiFileWriter(lowerBoundaries,
lowerBoundariesPolicies,
needEmptyFile(request));
initMultiWriter(writer, scanner, fd, shouldDropBehind);
initMultiWriter(writer, scanner, fd, shouldDropBehind, major);
return writer;
}
}, throughputController, user);

View File

@ -52,8 +52,8 @@ public class DefaultCompactor extends Compactor<StoreFileWriter> {
@Override
public StoreFileWriter createWriter(InternalScanner scanner,
org.apache.hadoop.hbase.regionserver.compactions.Compactor.FileDetails fd,
boolean shouldDropBehind) throws IOException {
return createTmpWriter(fd, shouldDropBehind);
boolean shouldDropBehind, boolean major) throws IOException {
return createTmpWriter(fd, shouldDropBehind, major);
}
};

View File

@ -93,10 +93,10 @@ public class StripeCompactor extends AbstractMultiOutputCompactor<StripeMultiFil
@Override
public StripeMultiFileWriter createWriter(InternalScanner scanner, FileDetails fd,
boolean shouldDropBehind) throws IOException {
boolean shouldDropBehind, boolean major) throws IOException {
StripeMultiFileWriter writer = new StripeMultiFileWriter.BoundaryMultiWriter(
store.getComparator(), targetBoundaries, majorRangeFromRow, majorRangeToRow);
initMultiWriter(writer, scanner, fd, shouldDropBehind);
initMultiWriter(writer, scanner, fd, shouldDropBehind, major);
return writer;
}
}, throughputController, user);
@ -115,10 +115,10 @@ public class StripeCompactor extends AbstractMultiOutputCompactor<StripeMultiFil
@Override
public StripeMultiFileWriter createWriter(InternalScanner scanner, FileDetails fd,
boolean shouldDropBehind) throws IOException {
boolean shouldDropBehind, boolean major) throws IOException {
StripeMultiFileWriter writer = new StripeMultiFileWriter.SizeMultiWriter(
store.getComparator(), targetCount, targetSize, left, right);
initMultiWriter(writer, scanner, fd, shouldDropBehind);
initMultiWriter(writer, scanner, fd, shouldDropBehind, major);
return writer;
}
}, throughputController, user);

View File

@ -293,6 +293,8 @@ public final class TableDescriptorChecker {
for (ColumnFamilyDescriptor cfd : td.getColumnFamilies()) {
CompressionTest.testCompression(cfd.getCompressionType());
CompressionTest.testCompression(cfd.getCompactionCompressionType());
CompressionTest.testCompression(cfd.getMajorCompactionCompressionType());
CompressionTest.testCompression(cfd.getMinorCompactionCompressionType());
}
}

View File

@ -156,7 +156,8 @@ public class FaultyMobStoreCompactor extends DefaultMobStoreCompactor {
try {
try {
mobFileWriter = mobStore.createWriterInTmp(new Date(fd.latestPutTs), fd.maxKeyCount,
compactionCompression, store.getRegionInfo().getStartKey(), true);
major ? majorCompactionCompression : minorCompactionCompression,
store.getRegionInfo().getStartKey(), true);
fileName = Bytes.toBytes(mobFileWriter.getPath().getName());
} catch (IOException e) {
// Bailing out

View File

@ -1152,6 +1152,22 @@ module Hbase
raise(ArgumentError, "Compression #{compression} is not supported. Use one of " + org.apache.hadoop.hbase.io.compress.Compression::Algorithm.constants.join(' '))
end
end
if arg.include?(ColumnFamilyDescriptorBuilder::COMPRESSION_COMPACT_MAJOR)
compression = arg.delete(ColumnFamilyDescriptorBuilder::COMPRESSION_COMPACT_MAJOR).upcase.to_sym
if org.apache.hadoop.hbase.io.compress.Compression::Algorithm.constants.include?(compression)
cfdb.setMajorCompactionCompressionType(org.apache.hadoop.hbase.io.compress.Compression::Algorithm.valueOf(compression))
else
raise(ArgumentError, "Compression #{compression} is not supported. Use one of " + org.apache.hadoop.hbase.io.compress.Compression::Algorithm.constants.join(' '))
end
end
if arg.include?(ColumnFamilyDescriptorBuilder::COMPRESSION_COMPACT_MINOR)
compression = arg.delete(ColumnFamilyDescriptorBuilder::COMPRESSION_COMPACT_MINOR).upcase.to_sym
if org.apache.hadoop.hbase.io.compress.Compression::Algorithm.constants.include?(compression)
cfdb.setMinorCompactionCompressionType(org.apache.hadoop.hbase.io.compress.Compression::Algorithm.valueOf(compression))
else
raise(ArgumentError, "Compression #{compression} is not supported. Use one of " + org.apache.hadoop.hbase.io.compress.Compression::Algorithm.constants.join(' '))
end
end
if arg.include?(ColumnFamilyDescriptorBuilder::STORAGE_POLICY)
storage_policy = arg.delete(ColumnFamilyDescriptorBuilder::STORAGE_POLICY).upcase
cfdb.setStoragePolicy(storage_policy)