HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
GzipCodec. Contributed by Aaron Kimball git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@898711 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8942119b04
commit
4cecab7c10
|
@ -1295,6 +1295,8 @@ Release 0.20.2 - Unreleased
|
||||||
HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
|
HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
|
||||||
server on exceeding maximum response size to free up Java heap. (suresh)
|
server on exceeding maximum response size to free up Java heap. (suresh)
|
||||||
|
|
||||||
|
HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
|
||||||
|
GzipCodec. (Aaron Kimball via cdouglas)
|
||||||
|
|
||||||
Release 0.20.1 - 2009-09-01
|
Release 0.20.1 - 2009-09-01
|
||||||
|
|
||||||
|
|
|
@ -165,7 +165,7 @@ public class GzipCodec extends DefaultCodec {
|
||||||
public Class<? extends Compressor> getCompressorType() {
|
public Class<? extends Compressor> getCompressorType() {
|
||||||
return ZlibFactory.isNativeZlibLoaded(conf)
|
return ZlibFactory.isNativeZlibLoaded(conf)
|
||||||
? GzipZlibCompressor.class
|
? GzipZlibCompressor.class
|
||||||
: BuiltInZlibDeflater.class;
|
: null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public CompressionInputStream createInputStream(InputStream in)
|
public CompressionInputStream createInputStream(InputStream in)
|
||||||
|
@ -196,7 +196,7 @@ public class GzipCodec extends DefaultCodec {
|
||||||
public Class<? extends Decompressor> getDecompressorType() {
|
public Class<? extends Decompressor> getDecompressorType() {
|
||||||
return ZlibFactory.isNativeZlibLoaded(conf)
|
return ZlibFactory.isNativeZlibLoaded(conf)
|
||||||
? GzipZlibDecompressor.class
|
? GzipZlibDecompressor.class
|
||||||
: BuiltInZlibInflater.class;
|
: null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getDefaultExtension() {
|
public String getDefaultExtension() {
|
||||||
|
|
|
@ -19,13 +19,24 @@ package org.apache.hadoop.io.compress;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.io.BufferedOutputStream;
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.BufferedWriter;
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
import java.io.DataOutputStream;
|
import java.io.DataOutputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
@ -41,6 +52,9 @@ import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.io.Writable;
|
import org.apache.hadoop.io.Writable;
|
||||||
import org.apache.hadoop.io.SequenceFile.CompressionType;
|
import org.apache.hadoop.io.SequenceFile.CompressionType;
|
||||||
import org.apache.hadoop.io.compress.CompressionOutputStream;
|
import org.apache.hadoop.io.compress.CompressionOutputStream;
|
||||||
|
import org.apache.hadoop.io.compress.CompressorStream;
|
||||||
|
import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
|
||||||
|
import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
|
||||||
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel;
|
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel;
|
||||||
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy;
|
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy;
|
||||||
import org.apache.hadoop.io.compress.zlib.ZlibFactory;
|
import org.apache.hadoop.io.compress.zlib.ZlibFactory;
|
||||||
|
@ -418,4 +432,154 @@ public class TestCodec {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCodecPoolAndGzipDecompressor() {
|
||||||
|
// BuiltInZlibInflater should not be used as the GzipCodec decompressor.
|
||||||
|
// Assert that this is the case.
|
||||||
|
|
||||||
|
// Don't use native libs for this test.
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setBoolean("hadoop.native.lib", false);
|
||||||
|
assertFalse("ZlibFactory is using native libs against request",
|
||||||
|
ZlibFactory.isNativeZlibLoaded(conf));
|
||||||
|
|
||||||
|
// This should give us a BuiltInZlibInflater.
|
||||||
|
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
|
||||||
|
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
|
||||||
|
assertTrue("ZlibFactory returned unexpected inflator",
|
||||||
|
zlibDecompressor instanceof BuiltInZlibInflater);
|
||||||
|
|
||||||
|
// Asking for a decompressor directly from GzipCodec should return null;
|
||||||
|
// its createOutputStream() just wraps the existing stream in a
|
||||||
|
// java.util.zip.GZIPOutputStream.
|
||||||
|
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
|
||||||
|
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
|
||||||
|
assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
|
||||||
|
Decompressor codecDecompressor = codec.createDecompressor();
|
||||||
|
if (null != codecDecompressor) {
|
||||||
|
fail("Got non-null codecDecompressor: " + codecDecompressor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Asking the CodecPool for a decompressor for GzipCodec
|
||||||
|
// should return null as well.
|
||||||
|
Decompressor poolDecompressor = CodecPool.getDecompressor(codec);
|
||||||
|
if (null != poolDecompressor) {
|
||||||
|
fail("Got non-null poolDecompressor: " + poolDecompressor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we then ensure that the pool is populated...
|
||||||
|
CodecPool.returnDecompressor(zlibDecompressor);
|
||||||
|
|
||||||
|
// Asking the pool another time should still not bind this to GzipCodec.
|
||||||
|
poolDecompressor = CodecPool.getDecompressor(codec);
|
||||||
|
if (null != poolDecompressor) {
|
||||||
|
fail("Second time, got non-null poolDecompressor: "
|
||||||
|
+ poolDecompressor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGzipCodecRead() throws IOException {
|
||||||
|
// Create a gzipped file and try to read it back, using a decompressor
|
||||||
|
// from the CodecPool.
|
||||||
|
|
||||||
|
// Don't use native libs for this test.
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setBoolean("hadoop.native.lib", false);
|
||||||
|
assertFalse("ZlibFactory is using native libs against request",
|
||||||
|
ZlibFactory.isNativeZlibLoaded(conf));
|
||||||
|
|
||||||
|
// Ensure that the CodecPool has a BuiltInZlibInflater in it.
|
||||||
|
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
|
||||||
|
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
|
||||||
|
assertTrue("ZlibFactory returned unexpected inflator",
|
||||||
|
zlibDecompressor instanceof BuiltInZlibInflater);
|
||||||
|
CodecPool.returnDecompressor(zlibDecompressor);
|
||||||
|
|
||||||
|
// Now create a GZip text file.
|
||||||
|
String tmpDir = System.getProperty("test.build.data", "/tmp/");
|
||||||
|
Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
|
||||||
|
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
|
||||||
|
new GZIPOutputStream(new FileOutputStream(f.toString()))));
|
||||||
|
final String msg = "This is the message in the file!";
|
||||||
|
bw.write(msg);
|
||||||
|
bw.close();
|
||||||
|
|
||||||
|
// Now read it back, using the CodecPool to establish the
|
||||||
|
// decompressor to use.
|
||||||
|
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
|
||||||
|
CompressionCodec codec = ccf.getCodec(f);
|
||||||
|
Decompressor decompressor = CodecPool.getDecompressor(codec);
|
||||||
|
FileSystem fs = FileSystem.getLocal(conf);
|
||||||
|
InputStream is = fs.open(f);
|
||||||
|
is = codec.createInputStream(is, decompressor);
|
||||||
|
BufferedReader br = new BufferedReader(new InputStreamReader(is));
|
||||||
|
String line = br.readLine();
|
||||||
|
assertEquals("Didn't get the same message back!", msg, line);
|
||||||
|
br.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyGzipFile(String filename, String msg) throws IOException {
|
||||||
|
BufferedReader r = new BufferedReader(new InputStreamReader(
|
||||||
|
new GZIPInputStream(new FileInputStream(filename))));
|
||||||
|
try {
|
||||||
|
String line = r.readLine();
|
||||||
|
assertEquals("Got invalid line back from " + filename, msg, line);
|
||||||
|
} finally {
|
||||||
|
r.close();
|
||||||
|
new File(filename).delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGzipCodecWrite() throws IOException {
|
||||||
|
// Create a gzipped file using a compressor from the CodecPool,
|
||||||
|
// and try to read it back via the regular GZIPInputStream.
|
||||||
|
|
||||||
|
// Don't use native libs for this test.
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setBoolean("hadoop.native.lib", false);
|
||||||
|
assertFalse("ZlibFactory is using native libs against request",
|
||||||
|
ZlibFactory.isNativeZlibLoaded(conf));
|
||||||
|
|
||||||
|
// Ensure that the CodecPool has a BuiltInZlibDeflater in it.
|
||||||
|
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
|
||||||
|
assertNotNull("zlibCompressor is null!", zlibCompressor);
|
||||||
|
assertTrue("ZlibFactory returned unexpected deflator",
|
||||||
|
zlibCompressor instanceof BuiltInZlibDeflater);
|
||||||
|
CodecPool.returnCompressor(zlibCompressor);
|
||||||
|
|
||||||
|
// Create a GZIP text file via the Compressor interface.
|
||||||
|
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
|
||||||
|
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
|
||||||
|
assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
|
||||||
|
|
||||||
|
final String msg = "This is the message we are going to compress.";
|
||||||
|
final String tmpDir = System.getProperty("test.build.data", "/tmp/");
|
||||||
|
final String fileName = new Path(new Path(tmpDir),
|
||||||
|
"testGzipCodecWrite.txt.gz").toString();
|
||||||
|
|
||||||
|
BufferedWriter w = null;
|
||||||
|
Compressor gzipCompressor = CodecPool.getCompressor(codec);
|
||||||
|
if (null != gzipCompressor) {
|
||||||
|
// If it gives us back a Compressor, we should be able to use this
|
||||||
|
// to write files we can then read back with Java's gzip tools.
|
||||||
|
OutputStream os = new CompressorStream(new FileOutputStream(fileName),
|
||||||
|
gzipCompressor);
|
||||||
|
w = new BufferedWriter(new OutputStreamWriter(os));
|
||||||
|
w.write(msg);
|
||||||
|
w.close();
|
||||||
|
CodecPool.returnCompressor(gzipCompressor);
|
||||||
|
|
||||||
|
verifyGzipFile(fileName, msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a gzip text file via codec.getOutputStream().
|
||||||
|
w = new BufferedWriter(new OutputStreamWriter(
|
||||||
|
codec.createOutputStream(new FileOutputStream(fileName))));
|
||||||
|
w.write(msg);
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
verifyGzipFile(fileName, msg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue