From 8d0c1b62af1e5f3206f63951b563ca4e3afaf381 Mon Sep 17 00:00:00 2001
From: Doron Cohen <doronc@apache.org>
Date: Thu, 24 Mar 2011 12:22:13 +0000
Subject: [PATCH] LUCENE-2977: WriteLineDocTask should write gzip/bzip2/txt
 according to the extension of specified output file name.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1084929 13f79535-47bb-0310-9956-ffa450edef68
---
 modules/benchmark/CHANGES.txt                 |   5 +
 .../benchmark/byTask/feeds/ContentSource.java |  78 ----------
 .../byTask/feeds/EnwikiContentSource.java     |   5 +-
 .../benchmark/byTask/feeds/LineDocSource.java |   5 +-
 .../byTask/feeds/TrecContentSource.java       |   5 +-
 .../byTask/tasks/WriteLineDocTask.java        |  44 ++----
 .../benchmark/byTask/utils/StreamUtils.java   | 144 ++++++++++++++++++
 .../byTask/feeds/LineDocSourceTest.java       |  38 ++---
 .../byTask/tasks/WriteLineDocTaskTest.java    |  60 ++++----
 .../StreamUtilsTest.java}                     |  96 +++++++-----
 10 files changed, 278 insertions(+), 202 deletions(-)
 create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
 rename modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/{feeds/ContentSourceTest.java => utils/StreamUtilsTest.java} (55%)

diff --git a/modules/benchmark/CHANGES.txt b/modules/benchmark/CHANGES.txt
index 12ce6b9ac51..00b6a5134c6 100644
--- a/modules/benchmark/CHANGES.txt
+++ b/modules/benchmark/CHANGES.txt
@@ -2,6 +2,11 @@ Lucene Benchmark Contrib Change Log
 
 The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
 
+03/24/2011
+  LUCENE-2977: WriteLineDocTask now automatically detects how to write -
+  GZip or BZip2 or Plain-text - according to the output file extension.
+  Property bzip.compression of WriteLineDocTask was canceled. (Doron Cohen)
+  
 03/23/2011
   LUCENE-2980: Benchmark's ContentSource no more requires lower case file suffixes 
   for detecting file type (gzip/bzip2/text). As part of this fix worked around an 
diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
index 4af7dde7623..cfe377c3ffe 100644
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
@@ -17,19 +17,11 @@ package org.apache.lucene.benchmark.byTask.feeds;
  * limitations under the License.
  */
 
-import java.io.BufferedInputStream;
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
 
-import org.apache.commons.compress.compressors.CompressorException;
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.lucene.benchmark.byTask.utils.Config;
 
 /**
@@ -56,17 +48,6 @@ import org.apache.lucene.benchmark.byTask.utils.Config;
  */
 public abstract class ContentSource {
   
-  private static final Map<String,String> extensionToType = new HashMap<String,String>();
-  static {
-  	// these in are lower case, we will lower case at the test as well
-    extensionToType.put(".bz2", CompressorStreamFactory.BZIP2);
-    extensionToType.put(".bzip", CompressorStreamFactory.BZIP2);
-    extensionToType.put(".gz", CompressorStreamFactory.GZIP);
-    extensionToType.put(".gzip", CompressorStreamFactory.GZIP);
-  }
-  
-  protected static final int BUFFER_SIZE = 1 << 16; // 64K
-
   private long bytesCount;
   private long totalBytesCount;
   private int docsCount;
@@ -78,8 +59,6 @@ public abstract class ContentSource {
   protected boolean verbose;
   protected String encoding;
   
-  private CompressorStreamFactory csFactory = new CompressorStreamFactory();
-
   /** update count of bytes generated by this source */  
   protected final synchronized void addBytes(long numBytes) {
     bytesCount += numBytes;
@@ -114,63 +93,6 @@ public abstract class ContentSource {
     }
   }
 
-  /**
-   * Returns an {@link InputStream} over the requested file. This method
-   * attempts to identify the appropriate {@link InputStream} instance to return
-   * based on the file name (e.g., if it ends with .bz2 or .bzip, return a
-   * 'bzip' {@link InputStream}).
-   */
-  protected InputStream getInputStream(File file) throws IOException {
-    // First, create a FileInputStream, as this will be required by all types.
-    // Wrap with BufferedInputStream for better performance
-    InputStream is = new BufferedInputStream(new FileInputStream(file), BUFFER_SIZE);
-    
-    String fileName = file.getName();
-    int idx = fileName.lastIndexOf('.');
-    String type = null;
-    if (idx != -1) {
-      type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
-    }
-    
-    if (type!=null) { // bzip or gzip
-    	try {
-    		return closableCompressorInputStream(type,is);
-    	} catch (CompressorException e) {
-    		IOException ioe = new IOException(e.getMessage());
-    		ioe.initCause(e);
-    		throw ioe;
-    	}
-    } 
-    
-    return is;
-  }
-  
-  /**
-   * Wrap the compressor input stream so that calling close will also close
-   * the underlying stream - workaround for CommonsCompress bug (COMPRESS-127). 
-   */
-  private InputStream closableCompressorInputStream(String type, final InputStream is) throws CompressorException {
-    final InputStream delegee = csFactory.createCompressorInputStream(type, is);
-    if (!type.equals(CompressorStreamFactory.GZIP)) {
-    	return delegee; //compressor bug affects only gzip
-    }
-    return new InputStream() {
-			@Override	public int read() throws IOException { return delegee.read();	}
-			@Override	public int read(byte[] b) throws IOException { return delegee.read(b);	}
-			@Override	public int available() throws IOException {	return delegee.available();	}
-			@Override	public synchronized void mark(int readlimit) { delegee.mark(readlimit);	}
-			@Override	public boolean markSupported() { return delegee.markSupported(); }
-			@Override	public int read(byte[] b, int off, int len) throws IOException { return delegee.read(b, off, len); }
-			@Override	public synchronized void reset() throws IOException {	delegee.reset(); }
-			@Override	public long skip(long n) throws IOException {	return delegee.skip(n);	}
-			@Override	
-			public void close() throws IOException { 
-				delegee.close();
-				is.close();
-			}
-    };
-	}
-
 	/**
    * Returns true whether it's time to log a message (depending on verbose and
    * the number of documents generated).
diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
index 5c71c5a4024..5153ad0c4eb 100644
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
@@ -24,6 +24,7 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.xml.sax.Attributes;
 import org.xml.sax.InputSource;
@@ -189,7 +190,7 @@ public class EnwikiContentSource extends ContentSource {
               return;
             } else if (localFileIS == is) {
               // If file is not already re-opened then re-open it now
-              is = getInputStream(file);
+              is = StreamUtils.inputStream(file);
             }
           }
         }
@@ -290,7 +291,7 @@ public class EnwikiContentSource extends ContentSource {
   @Override
   public void resetInputs() throws IOException {
     super.resetInputs();
-    is = getInputStream(file);
+    is = StreamUtils.inputStream(file);
   }
   
   @Override
diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
index 443fac5337a..ecc0aed42c1 100644
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
@@ -28,6 +28,7 @@ import java.util.Properties;
 
 import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
 import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
 
 /**
  * A {@link ContentSource} reading one line at a time as a
@@ -178,8 +179,8 @@ public class LineDocSource extends ContentSource {
       if (reader != null) {
         reader.close();
       }
-      InputStream is = getInputStream(file);
-      reader = new BufferedReader(new InputStreamReader(is, encoding), BUFFER_SIZE);
+      InputStream is = StreamUtils.inputStream(file);
+      reader = new BufferedReader(new InputStreamReader(is, encoding), StreamUtils.BUFFER_SIZE);
       if (skipHeaderLine) {
         reader.readLine(); // skip one line - the header line - already handled that info
       }
diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
index d60a12ccf90..3069c27463f 100644
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
@@ -32,6 +32,7 @@ import java.util.Locale;
 
 import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser.ParsePathType;
 import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
 import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader;
 import org.apache.lucene.util.ThreadInterruptedException;
 
@@ -194,8 +195,8 @@ public class TrecContentSource extends ContentSource {
         System.out.println("opening: " + f + " length: " + f.length());
       }
       try {
-        InputStream inputStream = getInputStream(f); // support either gzip, bzip2, or regular text file, by extension  
-        reader = new BufferedReader(new InputStreamReader(inputStream, encoding), BUFFER_SIZE);
+        InputStream inputStream = StreamUtils.inputStream(f); // support either gzip, bzip2, or regular text file, by extension  
+        reader = new BufferedReader(new InputStreamReader(inputStream, encoding), StreamUtils.BUFFER_SIZE);
         currPathType = TrecDocParser.pathType(f);
         return;
       } catch (Exception e) {
diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
index 3369e30e3dc..197fe45c153 100644
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
@@ -17,9 +17,8 @@ package org.apache.lucene.benchmark.byTask.tasks;
  * limitations under the License.
  */
 
-import java.io.BufferedOutputStream;
 import java.io.BufferedWriter;
-import java.io.FileOutputStream;
+import java.io.File;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
@@ -28,10 +27,10 @@ import java.util.HashSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 
@@ -40,14 +39,17 @@ import org.apache.lucene.document.Field;
  * following format: title &lt;TAB&gt; date &lt;TAB&gt; body. The output of this
  * task can be consumed by
  * {@link org.apache.lucene.benchmark.byTask.feeds.LineDocSource} and is intended
- * to save the IO overhead of opening a file per document to be indexed.<br>
+ * to save the IO overhead of opening a file per document to be indexed.
+ * <p>
+ * The format of the output is set according to the output file extension.
+ * Compression is recommended when the output file is expected to be large.
+ * See info on file extensions in {@link StreamUtils.Type}
+ * <p> 
  * Supports the following parameters:
  * <ul>
- * <li><b>line.file.out<b> - the name of the file to write the output to. That
+ * <li><b>line.file.out</b> - the name of the file to write the output to. That
  * parameter is mandatory. <b>NOTE:</b> the file is re-created.
- * <li><b>bzip.compression<b> - whether the output should be bzip-compressed. This is
- * recommended when the output file is expected to be large. 
- * <li><b>line.fields<b> - which fields should be written in each line.
+ * <li><b>line.fields</b> - which fields should be written in each line.
  * (optional, default: {@link #DEFAULT_FIELDS}).
  * <li><b>sufficient.fields</b> - list of field names, separated by comma, which, 
  * if all of them are missing, the document will be skipped. For example, to require 
@@ -91,30 +93,12 @@ public class WriteLineDocTask extends PerfTask {
   public WriteLineDocTask(PerfRunData runData) throws Exception {
     super(runData);
     Config config = runData.getConfig();
-    String fileName = config.get("line.file.out", null);
-    if (fileName == null) {
+    String fname = config.get("line.file.out", null);
+    if (fname == null) {
       throw new IllegalArgumentException("line.file.out must be set");
     }
-
-    OutputStream out = new FileOutputStream(fileName);
-    boolean doBzipCompression = false;
-    String doBZCompress = config.get("bzip.compression", null);
-    if (doBZCompress != null) {
-      // Property was set, use the value.
-      doBzipCompression = Boolean.valueOf(doBZCompress).booleanValue();
-    } else {
-      // Property was not set, attempt to detect based on file's extension
-      doBzipCompression = fileName.endsWith("bz2");
-    }
-
-    if (doBzipCompression) {
-      // Wrap with BOS since BZip2CompressorOutputStream calls out.write(int) 
-      // and does not use the write(byte[]) version. This proved to speed the 
-      // compression process by 70% !
-      out = new BufferedOutputStream(out, 1 << 16);
-      out = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out);
-    }
-    lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16));
+    OutputStream out = StreamUtils.outputStream(new File(fname));
+    lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), StreamUtils.BUFFER_SIZE));
     docMaker = runData.getDocMaker();
     
     // init fields 
diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
new file mode 100644
index 00000000000..b6f8d674d54
--- /dev/null
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
@@ -0,0 +1,144 @@
+package org.apache.lucene.benchmark.byTask.utils;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.commons.compress.compressors.CompressorException;
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
+
+/**
+ * Stream utilities.
+ */
+public class StreamUtils {
+
+	/** Buffer size used across the benchmark package */
+	public static final int BUFFER_SIZE = 1 << 16; // 64K
+	
+	/** File format type */
+	public enum Type {
+		/** BZIP2 is automatically used for <b>.bz2</b> and <b>.bzip2</b> extensions. */
+		BZIP2(CompressorStreamFactory.BZIP2),
+		/** GZIP is automatically used for <b>.gz</b> and <b>.gzip</b> extensions. */
+		GZIP(CompressorStreamFactory.GZIP),
+		/** Plain text is used for anything which is not GZIP or BZIP. */
+		PLAIN(null);
+		private final String csfType;
+		Type(String csfType) {
+			this.csfType = csfType;
+		}
+		private InputStream inputStream(InputStream in) throws IOException {
+			try {
+				return csfType==null ? in : closableCompressorInputStream(this, in);
+			} catch (CompressorException e) {
+    		IOException ioe = new IOException(e.getMessage());
+    		ioe.initCause(e);
+    		throw ioe;			}  
+		}
+		private OutputStream outputStream(OutputStream os) throws IOException {
+			try {
+				return csfType==null ? os : new CompressorStreamFactory().createCompressorOutputStream(csfType, os);
+			} catch (CompressorException e) {
+				IOException ioe = new IOException(e.getMessage());
+				ioe.initCause(e);
+				throw ioe;  
+			}  
+		}
+	}
+	
+  private static final Map<String,Type> extensionToType = new HashMap<String,Type>();
+  static {
+  	// these in are lower case, we will lower case at the test as well
+    extensionToType.put(".bz2", Type.BZIP2);
+    extensionToType.put(".bzip", Type.BZIP2);
+    extensionToType.put(".gz", Type.GZIP);
+    extensionToType.put(".gzip", Type.GZIP);
+  }
+  
+  
+  /**
+   * Returns an {@link InputStream} over the requested file. This method
+   * attempts to identify the appropriate {@link InputStream} instance to return
+   * based on the file name (e.g., if it ends with .bz2 or .bzip, return a
+   * 'bzip' {@link InputStream}).
+   */
+  public static InputStream inputStream(File file) throws IOException {
+    // First, create a FileInputStream, as this will be required by all types.
+    // Wrap with BufferedInputStream for better performance
+    InputStream in = new BufferedInputStream(new FileInputStream(file), BUFFER_SIZE);
+    return fileType(file).inputStream(in);
+  }
+
+  /** Return the type of the file, or null if unknown */
+  private static Type fileType(File file) {
+  	Type type = null;
+    String fileName = file.getName();
+    int idx = fileName.lastIndexOf('.');
+    if (idx != -1) {
+      type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
+    }
+    return type==null ? Type.PLAIN : type;
+	}
+  
+  /**
+   * Wrap the compressor input stream so that calling close will also close
+   * the underlying stream - workaround for CommonsCompress bug (COMPRESS-127). 
+   */
+  private static InputStream closableCompressorInputStream(Type type, final InputStream is) throws CompressorException {
+    final InputStream delegee = new CompressorStreamFactory().createCompressorInputStream(type.csfType, is);
+    if (!Type.GZIP.equals(type)) {
+      return delegee; //compressor bug affects only gzip
+    }
+    return new InputStream() {
+			@Override	public int read() throws IOException { return delegee.read();	}
+			@Override	public int read(byte[] b) throws IOException { return delegee.read(b);	}
+			@Override	public int available() throws IOException {	return delegee.available();	}
+			@Override	public synchronized void mark(int readlimit) { delegee.mark(readlimit);	}
+			@Override	public boolean markSupported() { return delegee.markSupported(); }
+			@Override	public int read(byte[] b, int off, int len) throws IOException { return delegee.read(b, off, len); }
+			@Override	public synchronized void reset() throws IOException {	delegee.reset(); }
+			@Override	public long skip(long n) throws IOException {	return delegee.skip(n);	}
+			@Override	
+			public void close() throws IOException { 
+				delegee.close();
+				is.close();
+			}
+    };
+	}
+
+  /**
+   * Returns an {@link OutputStream} over the requested file, identifying
+   * the appropriate {@link OutputStream} instance similar to {@link #inputStream(File)}.
+   */
+  public static OutputStream outputStream(File file) throws IOException {
+    // First, create a FileInputStream, as this will be required by all types.
+    // Wrap with BufferedInputStream for better performance
+    OutputStream os = new BufferedOutputStream(new FileOutputStream(file), BUFFER_SIZE);
+    return fileType(file).outputStream(os);
+  }
+}
diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
index 39672deb6db..7cc7dc0da2d 100644
--- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
+++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
@@ -103,23 +103,19 @@ public class LineDocSourceTest extends BenchmarkTestCase {
     writer.close();
   }
   
-  private void doIndexAndSearchTest(File file, boolean setBZCompress,
-      String bz2CompressVal, Class<? extends LineParser> lineParserClass, String storedField) throws Exception {
-    doIndexAndSearchTestWithRepeats(file, setBZCompress, bz2CompressVal, lineParserClass, 1, storedField); // no extra repetitions
-    doIndexAndSearchTestWithRepeats(file, setBZCompress, bz2CompressVal, lineParserClass, 2, storedField); // 1 extra repetition
-    doIndexAndSearchTestWithRepeats(file, setBZCompress, bz2CompressVal, lineParserClass, 4, storedField); // 3 extra repetitions
+  private void doIndexAndSearchTest(File file, Class<? extends LineParser> lineParserClass, String storedField) throws Exception {
+    doIndexAndSearchTestWithRepeats(file, lineParserClass, 1, storedField); // no extra repetitions
+    doIndexAndSearchTestWithRepeats(file, lineParserClass, 2, storedField); // 1 extra repetition
+    doIndexAndSearchTestWithRepeats(file, lineParserClass, 4, storedField); // 3 extra repetitions
   }
   
-  private void doIndexAndSearchTestWithRepeats(File file, boolean setBZCompress,
-      String bz2CompressVal, Class<? extends LineParser> lineParserClass, int numAdds, String storedField) throws Exception {
+  private void doIndexAndSearchTestWithRepeats(File file, 
+      Class<? extends LineParser> lineParserClass, int numAdds, String storedField) throws Exception {
 
     Properties props = new Properties();
     
     // LineDocSource specific settings.
     props.setProperty("docs.file", file.getAbsolutePath());
-    if (setBZCompress) {
-      props.setProperty("bzip.compression", bz2CompressVal);
-    }
     if (lineParserClass != null) {
       props.setProperty("line.parser", lineParserClass.getName());
     }
@@ -160,37 +156,31 @@ public class LineDocSourceTest extends BenchmarkTestCase {
   public void testBZip2() throws Exception {
     File file = new File(getWorkDir(), "one-line.bz2");
     createBZ2LineFile(file,true);
-    doIndexAndSearchTest(file, true, "true", null, null);
+    doIndexAndSearchTest(file, null, null);
   }
 
   public void testBZip2NoHeaderLine() throws Exception {
     File file = new File(getWorkDir(), "one-line.bz2");
     createBZ2LineFile(file,false);
-    doIndexAndSearchTest(file, true, "true", null, null);
-  }
-  
-  public void testBZip2AutoDetect() throws Exception {
-    File file = new File(getWorkDir(), "one-line.bz2");
-    createBZ2LineFile(file,false);
-    doIndexAndSearchTest(file, false, null, null, null);
+    doIndexAndSearchTest(file, null, null);
   }
   
   public void testRegularFile() throws Exception {
     File file = new File(getWorkDir(), "one-line");
     createRegularLineFile(file,true);
-    doIndexAndSearchTest(file, false, null, null, null);
+    doIndexAndSearchTest(file, null, null);
   }
 
   public void testRegularFileSpecialHeader() throws Exception {
     File file = new File(getWorkDir(), "one-line");
     createRegularLineFile(file,true);
-    doIndexAndSearchTest(file, false, null, HeaderLineParser.class, null);
+    doIndexAndSearchTest(file, HeaderLineParser.class, null);
   }
 
   public void testRegularFileNoHeaderLine() throws Exception {
     File file = new File(getWorkDir(), "one-line");
     createRegularLineFile(file,false);
-    doIndexAndSearchTest(file, false, null, null, null);
+    doIndexAndSearchTest(file, null, null);
   }
 
   public void testInvalidFormat() throws Exception {
@@ -210,7 +200,7 @@ public class LineDocSourceTest extends BenchmarkTestCase {
       writer.newLine();
       writer.close();
       try {
-        doIndexAndSearchTest(file, false, null, null, null);
+        doIndexAndSearchTest(file, null, null);
         fail("Some exception should have been thrown for: [" + testCases[i] + "]");
       } catch (Exception e) {
         // expected.
@@ -222,7 +212,7 @@ public class LineDocSourceTest extends BenchmarkTestCase {
   public void testWithDocsName()  throws Exception {
     File file = new File(getWorkDir(), "one-line");
     createRegularLineFileWithMoreFields(file, DocMaker.NAME_FIELD);
-    doIndexAndSearchTest(file, false, null, null, DocMaker.NAME_FIELD);
+    doIndexAndSearchTest(file, null, DocMaker.NAME_FIELD);
   }
 
   /** Use fields names that are not defined in Docmaker and so will go to Properties */
@@ -230,7 +220,7 @@ public class LineDocSourceTest extends BenchmarkTestCase {
     File file = new File(getWorkDir(), "one-line");
     String specialField = "mySpecialField";
     createRegularLineFileWithMoreFields(file, specialField);
-    doIndexAndSearchTest(file, false, null, null, specialField);
+    doIndexAndSearchTest(file, null, specialField);
   }
   
 }
diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
index 908c069e800..09e055229cd 100644
--- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
+++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
@@ -31,6 +31,7 @@ import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils.Type;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Index;
@@ -135,16 +136,12 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
 
   private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
 
-  private PerfRunData createPerfRunData(File file, boolean setBZCompress,
+  private PerfRunData createPerfRunData(File file, 
                                         boolean allowEmptyDocs,
-                                        String bz2CompressVal,
                                         String docMakerName) throws Exception {
     Properties props = new Properties();
     props.setProperty("doc.maker", docMakerName);
     props.setProperty("line.file.out", file.getAbsolutePath());
-    if (setBZCompress) {
-      props.setProperty("bzip.compression", bz2CompressVal);
-    }
     props.setProperty("directory", "RAMDirectory"); // no accidental FS dir.
     if (allowEmptyDocs) {
       props.setProperty("sufficient.fields", ",");
@@ -157,11 +154,19 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
     return new PerfRunData(config);
   }
   
-  private void doReadTest(File file, boolean bz2File, String expTitle,
+  private void doReadTest(File file, Type fileType, String expTitle,
                           String expDate, String expBody) throws Exception {
     InputStream in = new FileInputStream(file);
-    if (bz2File) {
-      in = csFactory.createCompressorInputStream("bzip2", in);
+    switch(fileType) {
+    	case BZIP2:
+    		in = csFactory.createCompressorInputStream(CompressorStreamFactory.BZIP2, in);
+    		break;
+    	case GZIP:
+    		in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in);
+    	case PLAIN:
+    		break; // nothing to do
+    	default:
+    		assertFalse("Unknown file type!",true); //fail, should not happen
     }
     BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8"));
     try {
@@ -192,36 +197,37 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
     
     // Create a document in bz2 format.
     File file = new File(getWorkDir(), "one-line.bz2");
-    PerfRunData runData = createPerfRunData(file, true, false, "true", WriteLineDocMaker.class.getName());
+    PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);
     wldt.doLogic();
     wldt.close();
     
-    doReadTest(file, true, "title", "date", "body");
+    doReadTest(file, Type.BZIP2, "title", "date", "body");
   }
   
-  public void testBZip2AutoDetect() throws Exception {
+  /* Tests WriteLineDocTask with a gzip format. */
+  public void testGZip() throws Exception {
     
-    // Create a document in bz2 format.
-    File file = new File(getWorkDir(), "one-line.bz2");
-    PerfRunData runData = createPerfRunData(file, false, false, null, WriteLineDocMaker.class.getName());
+    // Create a document in gz format.
+    File file = new File(getWorkDir(), "one-line.gz");
+    PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);
     wldt.doLogic();
     wldt.close();
     
-    doReadTest(file, true, "title", "date", "body");
+    doReadTest(file, Type.GZIP, "title", "date", "body");
   }
   
   public void testRegularFile() throws Exception {
     
     // Create a document in regular format.
     File file = new File(getWorkDir(), "one-line");
-    PerfRunData runData = createPerfRunData(file, true, false, "false", WriteLineDocMaker.class.getName());
+    PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);
     wldt.doLogic();
     wldt.close();
     
-    doReadTest(file, false, "title", "date", "body");
+    doReadTest(file, Type.PLAIN, "title", "date", "body");
   }
 
   public void testCharsReplace() throws Exception {
@@ -229,12 +235,12 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
     // separator char. However, it didn't replace newline characters, which
     // resulted in errors in LineDocSource.
     File file = new File(getWorkDir(), "one-line");
-    PerfRunData runData = createPerfRunData(file, false, false, null, NewLinesDocMaker.class.getName());
+    PerfRunData runData = createPerfRunData(file, false, NewLinesDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);
     wldt.doLogic();
     wldt.close();
     
-    doReadTest(file, false, "title text", "date text", "body text two");
+    doReadTest(file, Type.PLAIN, "title text", "date text", "body text two");
   }
   
   public void testEmptyBody() throws Exception {
@@ -242,28 +248,28 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
     // had a TITLE element (LUCENE-1755). It should throw away documents if they
     // don't have BODY nor TITLE
     File file = new File(getWorkDir(), "one-line");
-    PerfRunData runData = createPerfRunData(file, false, false, null, NoBodyDocMaker.class.getName());
+    PerfRunData runData = createPerfRunData(file, false, NoBodyDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);
     wldt.doLogic();
     wldt.close();
     
-    doReadTest(file, false, "title", "date", null);
+    doReadTest(file, Type.PLAIN, "title", "date", null);
   }
   
   public void testEmptyTitle() throws Exception {
     File file = new File(getWorkDir(), "one-line");
-    PerfRunData runData = createPerfRunData(file, false, false, null, NoTitleDocMaker.class.getName());
+    PerfRunData runData = createPerfRunData(file, false, NoTitleDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);
     wldt.doLogic();
     wldt.close();
     
-    doReadTest(file, false, "", "date", "body");
+    doReadTest(file, Type.PLAIN, "", "date", "body");
   }
   
   /** Fail by default when there's only date */
   public void testJustDate() throws Exception {
     File file = new File(getWorkDir(), "one-line");
-    PerfRunData runData = createPerfRunData(file, false, false, null, JustDateDocMaker.class.getName());
+    PerfRunData runData = createPerfRunData(file, false, JustDateDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);
     wldt.doLogic();
     wldt.close();
@@ -281,7 +287,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
 
   public void testLegalJustDate() throws Exception {
     File file = new File(getWorkDir(), "one-line");
-    PerfRunData runData = createPerfRunData(file, false, false, null, LegalJustDateDocMaker.class.getName());
+    PerfRunData runData = createPerfRunData(file, false, LegalJustDateDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);
     wldt.doLogic();
     wldt.close();
@@ -299,7 +305,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
 
   public void testEmptyDoc() throws Exception {
     File file = new File(getWorkDir(), "one-line");
-    PerfRunData runData = createPerfRunData(file, false, true, null, EmptyDocMaker.class.getName());
+    PerfRunData runData = createPerfRunData(file, true, EmptyDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);
     wldt.doLogic();
     wldt.close();
@@ -317,7 +323,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
 
   public void testMultiThreaded() throws Exception {
     File file = new File(getWorkDir(), "one-line");
-    PerfRunData runData = createPerfRunData(file, false, false, null, ThreadingDocMaker.class.getName());
+    PerfRunData runData = createPerfRunData(file, false, ThreadingDocMaker.class.getName());
     final WriteLineDocTask wldt = new WriteLineDocTask(runData);
     Thread[] threads = new Thread[10];
     for (int i = 0; i < threads.length; i++) {
diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java
similarity index 55%
rename from modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java
rename to modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java
index 6ba647a1721..110812b1fe3 100644
--- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java
+++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java
@@ -1,4 +1,4 @@
-package org.apache.lucene.benchmark.byTask.feeds;
+package org.apache.lucene.benchmark.byTask.utils;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -30,39 +30,63 @@ import java.io.OutputStreamWriter;
 
 import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
 import org.apache.lucene.util._TestUtil;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
-public class ContentSourceTest extends BenchmarkTestCase {
+public class StreamUtilsTest extends BenchmarkTestCase {
   private static final String TEXT = "Some-Text..."; 
   private File testDir;
-  private CompressorStreamFactory csFactory = new CompressorStreamFactory();
   
   @Test
   public void testGetInputStreamPlainText() throws Exception {
-    assertReadText(textFile("txt"));
-    assertReadText(textFile("TXT"));
+    assertReadText(rawTextFile("txt"));
+    assertReadText(rawTextFile("TXT"));
   }
 
   @Test
   public void testGetInputStreamGzip() throws Exception {
-    assertReadText(gzipFile("gz"));
-    assertReadText(gzipFile("gzip"));
-    assertReadText(gzipFile("GZ"));
-    assertReadText(gzipFile("GZIP"));
+    assertReadText(rawGzipFile("gz"));
+    assertReadText(rawGzipFile("gzip"));
+    assertReadText(rawGzipFile("GZ"));
+    assertReadText(rawGzipFile("GZIP"));
   }
 
   @Test
   public void testGetInputStreamBzip2() throws Exception {
-  	assertReadText(bzip2File("bz2"));
-  	assertReadText(bzip2File("bzip"));
-  	assertReadText(bzip2File("BZ2"));
-  	assertReadText(bzip2File("BZIP"));
+  	assertReadText(rawBzip2File("bz2"));
+  	assertReadText(rawBzip2File("bzip"));
+  	assertReadText(rawBzip2File("BZ2"));
+  	assertReadText(rawBzip2File("BZIP"));
+  }
+
+  @Test
+  public void testGetOutputStreamBzip2() throws Exception {
+  	assertReadText(autoOutFile("bz2"));
+  	assertReadText(autoOutFile("bzip"));
+  	assertReadText(autoOutFile("BZ2"));
+  	assertReadText(autoOutFile("BZIP"));
   }
   
-  private File textFile(String ext) throws Exception {
+  @Test
+  public void testGetOutputStreamGzip() throws Exception {
+  	assertReadText(autoOutFile("gz"));
+  	assertReadText(autoOutFile("gzip"));
+  	assertReadText(autoOutFile("GZ"));
+  	assertReadText(autoOutFile("GZIP"));
+  }
+
+  @Test
+  public void testGetOutputStreamPlain() throws Exception {
+  	assertReadText(autoOutFile("txt"));
+  	assertReadText(autoOutFile("text"));
+  	assertReadText(autoOutFile("TXT"));
+  	assertReadText(autoOutFile("TEXT"));
+  }
+  
+  private File rawTextFile(String ext) throws Exception {
     File f = new File(testDir,"testfile." +	ext);
     BufferedWriter w = new BufferedWriter(new FileWriter(f));
     w.write(TEXT);
@@ -71,38 +95,36 @@ public class ContentSourceTest extends BenchmarkTestCase {
     return f;
   }
   
-  private File gzipFile(String ext) throws Exception {
+  private File rawGzipFile(String ext) throws Exception {
     File f = new File(testDir,"testfile." +	ext);
-    OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.GZIP, new FileOutputStream(f));
-    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
-    w.write(TEXT);
-    w.newLine();
-    w.close();
+    OutputStream os = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP, new FileOutputStream(f));
+    writeText(os);
     return f;
   }
 
-  private File bzip2File(String ext) throws Exception {
+  private File rawBzip2File(String ext) throws Exception {
   	File f = new File(testDir,"testfile." +	ext);
-  	OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.BZIP2, new FileOutputStream(f));
-  	BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
-  	w.write(TEXT);
-  	w.newLine();
-  	w.close();
+  	OutputStream os = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.BZIP2, new FileOutputStream(f));
+  	writeText(os);
   	return f;
   }
 
+  private File autoOutFile(String ext) throws Exception {
+  	File f = new File(testDir,"testfile." +	ext);
+  	OutputStream os = StreamUtils.outputStream(f);
+  	writeText(os);
+  	return f;
+  }
+
+	private void writeText(OutputStream os) throws IOException {
+		BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
+  	w.write(TEXT);
+  	w.newLine();
+  	w.close();
+	}
+
   private void assertReadText(File f) throws Exception {
-    ContentSource src = new ContentSource() {
-      @Override
-      public void close() throws IOException { 
-      }
-      @Override
-      public DocData getNextDocData(DocData docData) throws NoMoreDataException,
-      IOException { 
-        return null;
-      }
-    };
-    InputStream ir = src.getInputStream(f);
+    InputStream ir = StreamUtils.inputStream(f);
     InputStreamReader in = new InputStreamReader(ir);
     BufferedReader r = new BufferedReader(in);
     String line = r.readLine();