HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
GzipCodec. Contributed by Aaron Kimball git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@898711 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8942119b04
commit
4cecab7c10
|
@ -1294,7 +1294,9 @@ Release 0.20.2 - Unreleased
|
|||
|
||||
HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
|
||||
server on exceeding maximum response size to free up Java heap. (suresh)
|
||||
|
||||
|
||||
HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
|
||||
GzipCodec. (Aaron Kimball via cdouglas)
|
||||
|
||||
Release 0.20.1 - 2009-09-01
|
||||
|
||||
|
|
|
@ -165,7 +165,7 @@ public class GzipCodec extends DefaultCodec {
|
|||
public Class<? extends Compressor> getCompressorType() {
|
||||
return ZlibFactory.isNativeZlibLoaded(conf)
|
||||
? GzipZlibCompressor.class
|
||||
: BuiltInZlibDeflater.class;
|
||||
: null;
|
||||
}
|
||||
|
||||
public CompressionInputStream createInputStream(InputStream in)
|
||||
|
@ -196,7 +196,7 @@ public class GzipCodec extends DefaultCodec {
|
|||
public Class<? extends Decompressor> getDecompressorType() {
|
||||
return ZlibFactory.isNativeZlibLoaded(conf)
|
||||
? GzipZlibDecompressor.class
|
||||
: BuiltInZlibInflater.class;
|
||||
: null;
|
||||
}
|
||||
|
||||
public String getDefaultExtension() {
|
||||
|
|
|
@ -19,13 +19,24 @@ package org.apache.hadoop.io.compress;
|
|||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
|
@ -41,6 +52,9 @@ import org.apache.hadoop.io.Text;
|
|||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.io.SequenceFile.CompressionType;
|
||||
import org.apache.hadoop.io.compress.CompressionOutputStream;
|
||||
import org.apache.hadoop.io.compress.CompressorStream;
|
||||
import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
|
||||
import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
|
||||
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel;
|
||||
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy;
|
||||
import org.apache.hadoop.io.compress.zlib.ZlibFactory;
|
||||
|
@ -418,4 +432,154 @@ public class TestCodec {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCodecPoolAndGzipDecompressor() {
|
||||
// BuiltInZlibInflater should not be used as the GzipCodec decompressor.
|
||||
// Assert that this is the case.
|
||||
|
||||
// Don't use native libs for this test.
|
||||
Configuration conf = new Configuration();
|
||||
conf.setBoolean("hadoop.native.lib", false);
|
||||
assertFalse("ZlibFactory is using native libs against request",
|
||||
ZlibFactory.isNativeZlibLoaded(conf));
|
||||
|
||||
// This should give us a BuiltInZlibInflater.
|
||||
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
|
||||
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
|
||||
assertTrue("ZlibFactory returned unexpected inflator",
|
||||
zlibDecompressor instanceof BuiltInZlibInflater);
|
||||
|
||||
// Asking for a decompressor directly from GzipCodec should return null;
|
||||
// its createOutputStream() just wraps the existing stream in a
|
||||
// java.util.zip.GZIPOutputStream.
|
||||
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
|
||||
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
|
||||
assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
|
||||
Decompressor codecDecompressor = codec.createDecompressor();
|
||||
if (null != codecDecompressor) {
|
||||
fail("Got non-null codecDecompressor: " + codecDecompressor);
|
||||
}
|
||||
|
||||
// Asking the CodecPool for a decompressor for GzipCodec
|
||||
// should return null as well.
|
||||
Decompressor poolDecompressor = CodecPool.getDecompressor(codec);
|
||||
if (null != poolDecompressor) {
|
||||
fail("Got non-null poolDecompressor: " + poolDecompressor);
|
||||
}
|
||||
|
||||
// If we then ensure that the pool is populated...
|
||||
CodecPool.returnDecompressor(zlibDecompressor);
|
||||
|
||||
// Asking the pool another time should still not bind this to GzipCodec.
|
||||
poolDecompressor = CodecPool.getDecompressor(codec);
|
||||
if (null != poolDecompressor) {
|
||||
fail("Second time, got non-null poolDecompressor: "
|
||||
+ poolDecompressor);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGzipCodecRead() throws IOException {
|
||||
// Create a gzipped file and try to read it back, using a decompressor
|
||||
// from the CodecPool.
|
||||
|
||||
// Don't use native libs for this test.
|
||||
Configuration conf = new Configuration();
|
||||
conf.setBoolean("hadoop.native.lib", false);
|
||||
assertFalse("ZlibFactory is using native libs against request",
|
||||
ZlibFactory.isNativeZlibLoaded(conf));
|
||||
|
||||
// Ensure that the CodecPool has a BuiltInZlibInflater in it.
|
||||
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
|
||||
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
|
||||
assertTrue("ZlibFactory returned unexpected inflator",
|
||||
zlibDecompressor instanceof BuiltInZlibInflater);
|
||||
CodecPool.returnDecompressor(zlibDecompressor);
|
||||
|
||||
// Now create a GZip text file.
|
||||
String tmpDir = System.getProperty("test.build.data", "/tmp/");
|
||||
Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
|
||||
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
|
||||
new GZIPOutputStream(new FileOutputStream(f.toString()))));
|
||||
final String msg = "This is the message in the file!";
|
||||
bw.write(msg);
|
||||
bw.close();
|
||||
|
||||
// Now read it back, using the CodecPool to establish the
|
||||
// decompressor to use.
|
||||
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
|
||||
CompressionCodec codec = ccf.getCodec(f);
|
||||
Decompressor decompressor = CodecPool.getDecompressor(codec);
|
||||
FileSystem fs = FileSystem.getLocal(conf);
|
||||
InputStream is = fs.open(f);
|
||||
is = codec.createInputStream(is, decompressor);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is));
|
||||
String line = br.readLine();
|
||||
assertEquals("Didn't get the same message back!", msg, line);
|
||||
br.close();
|
||||
}
|
||||
|
||||
private void verifyGzipFile(String filename, String msg) throws IOException {
|
||||
BufferedReader r = new BufferedReader(new InputStreamReader(
|
||||
new GZIPInputStream(new FileInputStream(filename))));
|
||||
try {
|
||||
String line = r.readLine();
|
||||
assertEquals("Got invalid line back from " + filename, msg, line);
|
||||
} finally {
|
||||
r.close();
|
||||
new File(filename).delete();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGzipCodecWrite() throws IOException {
|
||||
// Create a gzipped file using a compressor from the CodecPool,
|
||||
// and try to read it back via the regular GZIPInputStream.
|
||||
|
||||
// Don't use native libs for this test.
|
||||
Configuration conf = new Configuration();
|
||||
conf.setBoolean("hadoop.native.lib", false);
|
||||
assertFalse("ZlibFactory is using native libs against request",
|
||||
ZlibFactory.isNativeZlibLoaded(conf));
|
||||
|
||||
// Ensure that the CodecPool has a BuiltInZlibDeflater in it.
|
||||
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
|
||||
assertNotNull("zlibCompressor is null!", zlibCompressor);
|
||||
assertTrue("ZlibFactory returned unexpected deflator",
|
||||
zlibCompressor instanceof BuiltInZlibDeflater);
|
||||
CodecPool.returnCompressor(zlibCompressor);
|
||||
|
||||
// Create a GZIP text file via the Compressor interface.
|
||||
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
|
||||
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
|
||||
assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
|
||||
|
||||
final String msg = "This is the message we are going to compress.";
|
||||
final String tmpDir = System.getProperty("test.build.data", "/tmp/");
|
||||
final String fileName = new Path(new Path(tmpDir),
|
||||
"testGzipCodecWrite.txt.gz").toString();
|
||||
|
||||
BufferedWriter w = null;
|
||||
Compressor gzipCompressor = CodecPool.getCompressor(codec);
|
||||
if (null != gzipCompressor) {
|
||||
// If it gives us back a Compressor, we should be able to use this
|
||||
// to write files we can then read back with Java's gzip tools.
|
||||
OutputStream os = new CompressorStream(new FileOutputStream(fileName),
|
||||
gzipCompressor);
|
||||
w = new BufferedWriter(new OutputStreamWriter(os));
|
||||
w.write(msg);
|
||||
w.close();
|
||||
CodecPool.returnCompressor(gzipCompressor);
|
||||
|
||||
verifyGzipFile(fileName, msg);
|
||||
}
|
||||
|
||||
// Create a gzip text file via codec.getOutputStream().
|
||||
w = new BufferedWriter(new OutputStreamWriter(
|
||||
codec.createOutputStream(new FileOutputStream(fileName))));
|
||||
w.write(msg);
|
||||
w.close();
|
||||
|
||||
verifyGzipFile(fileName, msg);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue