[jira] [HBASE-5470] Make DataBlockEncodingTool work correctly with no native

compression codecs loaded

Summary:
DataBlockEncodingTool was fixed as part of porting data block encoding
(HBASE-4218) to 89-fb
(https://reviews.facebook.net/rHBASEEIGHTNINEFBBRANCH1245291,
https://reviews.facebook.net/D1659). The bug being fixed here appeared when
using GZ as baseline compression codec but not loading native Hadoop libraries,
in which case the compressor instance would be null.

Test Plan:
Run DataBlockEncoding tool with GZ (no native codecs) and LZO (with native
codecs) as baseline (Hadoop-level) compression codecs

Reviewers: JIRA, Kannan, mcorgan, lhofhansl, todd, stack, tedyu

Reviewed By: tedyu

Differential Revision: https://reviews.facebook.net/D1917

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1293057 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
mbautin 2012-02-24 02:03:37 +00:00
parent 398289ce6e
commit 85c90d9274
1 changed files with 41 additions and 25 deletions

View File

@ -18,7 +18,6 @@ package org.apache.hadoop.hbase.regionserver;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
@ -74,6 +73,10 @@ public class DataBlockEncodingTool {
*/ */
public static int BENCHMARK_N_OMIT = 2; public static int BENCHMARK_N_OMIT = 2;
/** Compression algorithm to use if not specified on the command line */
private static final Algorithm DEFAULT_COMPRESSION =
Compression.Algorithm.GZ;
private List<EncodedDataBlock> codecs = new ArrayList<EncodedDataBlock>(); private List<EncodedDataBlock> codecs = new ArrayList<EncodedDataBlock>();
private int totalPrefixLength = 0; private int totalPrefixLength = 0;
private int totalKeyLength = 0; private int totalKeyLength = 0;
@ -96,6 +99,7 @@ public class DataBlockEncodingTool {
this.compressor = this.compressionAlgorithm.getCompressor(); this.compressor = this.compressionAlgorithm.getCompressor();
this.decompressor = this.compressionAlgorithm.getDecompressor(); this.decompressor = this.compressionAlgorithm.getDecompressor();
} }
/** /**
* Check statistics for given HFile for different data block encoders. * Check statistics for given HFile for different data block encoders.
* @param scanner Of file which will be compressed. * @param scanner Of file which will be compressed.
@ -431,7 +435,9 @@ public class DataBlockEncodingTool {
*/ */
public void displayStatistics() { public void displayStatistics() {
int totalLength = totalPrefixLength + totalKeyLength + totalValueLength; int totalLength = totalPrefixLength + totalKeyLength + totalValueLength;
if (compressor != null) { // might be null e.g. for pure-Java GZIP
compressor.reset(); compressor.reset();
}
for(EncodedDataBlock codec : codecs) { for(EncodedDataBlock codec : codecs) {
System.out.println(codec.toString()); System.out.println(codec.toString());
@ -445,14 +451,26 @@ public class DataBlockEncodingTool {
String.format(" Key compression ratio: %.2f %%", keyRatio)); String.format(" Key compression ratio: %.2f %%", keyRatio));
System.out.println( System.out.println(
String.format(" All compression ratio: %.2f %%", allRatio)); String.format(" All compression ratio: %.2f %%", allRatio));
String compressedSizeCaption =
String.format(" %s compressed size: ",
compressionAlgorithmName.toUpperCase());
String compressOnlyRatioCaption =
String.format(" %s compression ratio: ",
compressionAlgorithmName.toUpperCase());
if (compressor != null) {
int compressedSize = codec.checkCompressedSize(compressor); int compressedSize = codec.checkCompressedSize(compressor);
System.out.println( System.out.println(compressedSizeCaption +
String.format(" %s compressed size: %8d", String.format("%8d", compressedSize));
compressionAlgorithmName.toUpperCase(), compressedSize)); double compressOnlyRatio =
double lzoRatio = 100.0 * (1.0 - compressedSize / (0.0 + totalLength)); 100.0 * (1.0 - compressedSize / (0.0 + totalLength));
System.out.println( System.out.println(compressOnlyRatioCaption
String.format(" %s compression ratio: %.2f %%", + String.format("%.2f %%", compressOnlyRatio));
compressionAlgorithmName.toUpperCase(), lzoRatio)); } else {
System.out.println(compressedSizeCaption + "N/A");
System.out.println(compressOnlyRatioCaption + "N/A");
}
} }
System.out.println( System.out.println(
@ -475,12 +493,11 @@ public class DataBlockEncodingTool {
* @param doVerify Verify correctness. * @param doVerify Verify correctness.
* @throws IOException When pathName is incorrect. * @throws IOException When pathName is incorrect.
*/ */
public static void testCodecs(int kvLimit, String hfilePath, public static void testCodecs(Configuration conf, int kvLimit,
String compressionName, boolean doBenchmark, boolean doVerify) String hfilePath, String compressionName, boolean doBenchmark,
throws IOException { boolean doVerify) throws IOException {
// create environment // create environment
Path path = new Path(hfilePath); Path path = new Path(hfilePath);
Configuration conf = HBaseConfiguration.create();
CacheConfig cacheConf = new CacheConfig(conf); CacheConfig cacheConf = new CacheConfig(conf);
FileSystem fs = FileSystem.get(conf); FileSystem fs = FileSystem.get(conf);
StoreFile hsf = new StoreFile(fs, path, conf, cacheConf, StoreFile hsf = new StoreFile(fs, path, conf, cacheConf,
@ -564,22 +581,21 @@ public class DataBlockEncodingTool {
System.exit(-1); System.exit(-1);
} }
if (!(new File(cmd.getOptionValue("f"))).exists()) {
System.err.println(String.format("ERROR: file '%s' doesn't exist!",
cmd.getOptionValue("f")));
printUsage(options);
System.exit(-1);
}
String pathName = cmd.getOptionValue("f"); String pathName = cmd.getOptionValue("f");
String compressionName = "gz"; String compressionName = DEFAULT_COMPRESSION.getName();
if (cmd.hasOption("a")) { if (cmd.hasOption("a")) {
compressionName = cmd.getOptionValue("a"); compressionName = cmd.getOptionValue("a").toLowerCase();
} }
boolean doBenchmark = cmd.hasOption("b"); boolean doBenchmark = cmd.hasOption("b");
boolean doVerify = !cmd.hasOption("c"); boolean doVerify = !cmd.hasOption("c");
testCodecs(kvLimit, pathName, compressionName, doBenchmark, doVerify); final Configuration conf = HBaseConfiguration.create();
try {
testCodecs(conf, kvLimit, pathName, compressionName, doBenchmark,
doVerify);
} finally {
(new CacheConfig(conf)).getBlockCache().shutdown();
}
} }
} }