diff --git a/CHANGES.txt b/CHANGES.txt index d54f231a27b..d9069791a03 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1294,7 +1294,9 @@ Release 0.20.2 - Unreleased HADOOP-6460. Reinitializes buffers used for serializing responses in ipc server on exceeding maximum response size to free up Java heap. (suresh) - + + HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in + GzipCodec. (Aaron Kimball via cdouglas) Release 0.20.1 - 2009-09-01 diff --git a/src/java/org/apache/hadoop/io/compress/GzipCodec.java b/src/java/org/apache/hadoop/io/compress/GzipCodec.java index e19cd6b375d..9a52941e120 100644 --- a/src/java/org/apache/hadoop/io/compress/GzipCodec.java +++ b/src/java/org/apache/hadoop/io/compress/GzipCodec.java @@ -165,7 +165,7 @@ public class GzipCodec extends DefaultCodec { public Class getCompressorType() { return ZlibFactory.isNativeZlibLoaded(conf) ? GzipZlibCompressor.class - : BuiltInZlibDeflater.class; + : null; } public CompressionInputStream createInputStream(InputStream in) @@ -196,7 +196,7 @@ public class GzipCodec extends DefaultCodec { public Class getDecompressorType() { return ZlibFactory.isNativeZlibLoaded(conf) ? GzipZlibDecompressor.class - : BuiltInZlibInflater.class; + : null; } public String getDefaultExtension() { diff --git a/src/test/core/org/apache/hadoop/io/compress/TestCodec.java b/src/test/core/org/apache/hadoop/io/compress/TestCodec.java index ae79af347e1..873a665ed7d 100644 --- a/src/test/core/org/apache/hadoop/io/compress/TestCodec.java +++ b/src/test/core/org/apache/hadoop/io/compress/TestCodec.java @@ -19,13 +19,24 @@ package org.apache.hadoop.io.compress; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; import java.util.Arrays; import java.util.Random; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -41,6 +52,9 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.CompressorStream; +import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater; +import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater; import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel; import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy; import org.apache.hadoop.io.compress.zlib.ZlibFactory; @@ -418,4 +432,154 @@ public class TestCodec { } + @Test + public void testCodecPoolAndGzipDecompressor() { + // BuiltInZlibInflater should not be used as the GzipCodec decompressor. + // Assert that this is the case. + + // Don't use native libs for this test. + Configuration conf = new Configuration(); + conf.setBoolean("hadoop.native.lib", false); + assertFalse("ZlibFactory is using native libs against request", + ZlibFactory.isNativeZlibLoaded(conf)); + + // This should give us a BuiltInZlibInflater. + Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf); + assertNotNull("zlibDecompressor is null!", zlibDecompressor); + assertTrue("ZlibFactory returned unexpected inflator", + zlibDecompressor instanceof BuiltInZlibInflater); + + // Asking for a decompressor directly from GzipCodec should return null; + // its createOutputStream() just wraps the existing stream in a + // java.util.zip.GZIPOutputStream. + CompressionCodecFactory ccf = new CompressionCodecFactory(conf); + CompressionCodec codec = ccf.getCodec(new Path("foo.gz")); + assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec); + Decompressor codecDecompressor = codec.createDecompressor(); + if (null != codecDecompressor) { + fail("Got non-null codecDecompressor: " + codecDecompressor); + } + + // Asking the CodecPool for a decompressor for GzipCodec + // should return null as well. + Decompressor poolDecompressor = CodecPool.getDecompressor(codec); + if (null != poolDecompressor) { + fail("Got non-null poolDecompressor: " + poolDecompressor); + } + + // If we then ensure that the pool is populated... + CodecPool.returnDecompressor(zlibDecompressor); + + // Asking the pool another time should still not bind this to GzipCodec. + poolDecompressor = CodecPool.getDecompressor(codec); + if (null != poolDecompressor) { + fail("Second time, got non-null poolDecompressor: " + + poolDecompressor); + } + } + + @Test + public void testGzipCodecRead() throws IOException { + // Create a gzipped file and try to read it back, using a decompressor + // from the CodecPool. + + // Don't use native libs for this test. + Configuration conf = new Configuration(); + conf.setBoolean("hadoop.native.lib", false); + assertFalse("ZlibFactory is using native libs against request", + ZlibFactory.isNativeZlibLoaded(conf)); + + // Ensure that the CodecPool has a BuiltInZlibInflater in it. + Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf); + assertNotNull("zlibDecompressor is null!", zlibDecompressor); + assertTrue("ZlibFactory returned unexpected inflator", + zlibDecompressor instanceof BuiltInZlibInflater); + CodecPool.returnDecompressor(zlibDecompressor); + + // Now create a GZip text file. + String tmpDir = System.getProperty("test.build.data", "/tmp/"); + Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz"); + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( + new GZIPOutputStream(new FileOutputStream(f.toString())))); + final String msg = "This is the message in the file!"; + bw.write(msg); + bw.close(); + + // Now read it back, using the CodecPool to establish the + // decompressor to use. + CompressionCodecFactory ccf = new CompressionCodecFactory(conf); + CompressionCodec codec = ccf.getCodec(f); + Decompressor decompressor = CodecPool.getDecompressor(codec); + FileSystem fs = FileSystem.getLocal(conf); + InputStream is = fs.open(f); + is = codec.createInputStream(is, decompressor); + BufferedReader br = new BufferedReader(new InputStreamReader(is)); + String line = br.readLine(); + assertEquals("Didn't get the same message back!", msg, line); + br.close(); + } + + private void verifyGzipFile(String filename, String msg) throws IOException { + BufferedReader r = new BufferedReader(new InputStreamReader( + new GZIPInputStream(new FileInputStream(filename)))); + try { + String line = r.readLine(); + assertEquals("Got invalid line back from " + filename, msg, line); + } finally { + r.close(); + new File(filename).delete(); + } + } + + @Test + public void testGzipCodecWrite() throws IOException { + // Create a gzipped file using a compressor from the CodecPool, + // and try to read it back via the regular GZIPInputStream. + + // Don't use native libs for this test. + Configuration conf = new Configuration(); + conf.setBoolean("hadoop.native.lib", false); + assertFalse("ZlibFactory is using native libs against request", + ZlibFactory.isNativeZlibLoaded(conf)); + + // Ensure that the CodecPool has a BuiltInZlibDeflater in it. + Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf); + assertNotNull("zlibCompressor is null!", zlibCompressor); + assertTrue("ZlibFactory returned unexpected deflator", + zlibCompressor instanceof BuiltInZlibDeflater); + CodecPool.returnCompressor(zlibCompressor); + + // Create a GZIP text file via the Compressor interface. + CompressionCodecFactory ccf = new CompressionCodecFactory(conf); + CompressionCodec codec = ccf.getCodec(new Path("foo.gz")); + assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec); + + final String msg = "This is the message we are going to compress."; + final String tmpDir = System.getProperty("test.build.data", "/tmp/"); + final String fileName = new Path(new Path(tmpDir), + "testGzipCodecWrite.txt.gz").toString(); + + BufferedWriter w = null; + Compressor gzipCompressor = CodecPool.getCompressor(codec); + if (null != gzipCompressor) { + // If it gives us back a Compressor, we should be able to use this + // to write files we can then read back with Java's gzip tools. + OutputStream os = new CompressorStream(new FileOutputStream(fileName), + gzipCompressor); + w = new BufferedWriter(new OutputStreamWriter(os)); + w.write(msg); + w.close(); + CodecPool.returnCompressor(gzipCompressor); + + verifyGzipFile(fileName, msg); + } + + // Create a gzip text file via codec.getOutputStream(). + w = new BufferedWriter(new OutputStreamWriter( + codec.createOutputStream(new FileOutputStream(fileName)))); + w.write(msg); + w.close(); + + verifyGzipFile(fileName, msg); + } }