diff --git a/modules/benchmark/CHANGES.txt b/modules/benchmark/CHANGES.txt index b33c96e9c1f..3934939c7c9 100644 --- a/modules/benchmark/CHANGES.txt +++ b/modules/benchmark/CHANGES.txt @@ -2,6 +2,12 @@ Lucene Benchmark Contrib Change Log The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. +03/22/2011 + LUCENE-2980: Benchmark's ContentSource no more requires lower case file suffixes + for detecting file type (gzip/bzip2/text). As part of this fix worked around an + issue with gzip/bzip input streams which were remaining open (See COMPRESS-127). + (Doron Cohen) + 03/22/2011 LUCENE-2978: Upgrade benchmark's commons-compress from 1.0 to 1.1 as the move of gzip decompression in LUCENE-1540 from Java's GZipInputStream diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java index da160d7a4d5..4af7dde7623 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java @@ -25,6 +25,7 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.Locale; import java.util.Map; import org.apache.commons.compress.compressors.CompressorException; @@ -128,23 +129,49 @@ public abstract class ContentSource { int idx = fileName.lastIndexOf('.'); String type = null; if (idx != -1) { - type = extensionToType.get(fileName.substring(idx)); + type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH)); } - try { - if (type!=null) { // bzip or gzip - return csFactory.createCompressorInputStream(type, is); - } - } catch (CompressorException e) { - IOException ioe = new IOException(e.getMessage()); - ioe.initCause(e); - throw ioe; - } + if (type!=null) { // bzip or gzip + try { + return closableCompressorInputStream(type,is); + } catch (CompressorException e) { + IOException ioe = new IOException(e.getMessage()); + ioe.initCause(e); + throw ioe; + } + } return is; } /** + * Wrap the compressor input stream so that calling close will also close + * the underlying stream - workaround for CommonsCompress bug (COMPRESS-127). + */ + private InputStream closableCompressorInputStream(String type, final InputStream is) throws CompressorException { + final InputStream delegee = csFactory.createCompressorInputStream(type, is); + if (!type.equals(CompressorStreamFactory.GZIP)) { + return delegee; //compressor bug affects only gzip + } + return new InputStream() { + @Override public int read() throws IOException { return delegee.read(); } + @Override public int read(byte[] b) throws IOException { return delegee.read(b); } + @Override public int available() throws IOException { return delegee.available(); } + @Override public synchronized void mark(int readlimit) { delegee.mark(readlimit); } + @Override public boolean markSupported() { return delegee.markSupported(); } + @Override public int read(byte[] b, int off, int len) throws IOException { return delegee.read(b, off, len); } + @Override public synchronized void reset() throws IOException { delegee.reset(); } + @Override public long skip(long n) throws IOException { return delegee.skip(n); } + @Override + public void close() throws IOException { + delegee.close(); + is.close(); + } + }; + } + + /** * Returns true whether it's time to log a message (depending on verbose and * the number of documents generated). */ diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java new file mode 100644 index 00000000000..6ba647a1721 --- /dev/null +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java @@ -0,0 +1,127 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; + +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.lucene.benchmark.BenchmarkTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class ContentSourceTest extends BenchmarkTestCase { + private static final String TEXT = "Some-Text..."; + private File testDir; + private CompressorStreamFactory csFactory = new CompressorStreamFactory(); + + @Test + public void testGetInputStreamPlainText() throws Exception { + assertReadText(textFile("txt")); + assertReadText(textFile("TXT")); + } + + @Test + public void testGetInputStreamGzip() throws Exception { + assertReadText(gzipFile("gz")); + assertReadText(gzipFile("gzip")); + assertReadText(gzipFile("GZ")); + assertReadText(gzipFile("GZIP")); + } + + @Test + public void testGetInputStreamBzip2() throws Exception { + assertReadText(bzip2File("bz2")); + assertReadText(bzip2File("bzip")); + assertReadText(bzip2File("BZ2")); + assertReadText(bzip2File("BZIP")); + } + + private File textFile(String ext) throws Exception { + File f = new File(testDir,"testfile." + ext); + BufferedWriter w = new BufferedWriter(new FileWriter(f)); + w.write(TEXT); + w.newLine(); + w.close(); + return f; + } + + private File gzipFile(String ext) throws Exception { + File f = new File(testDir,"testfile." + ext); + OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.GZIP, new FileOutputStream(f)); + BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os)); + w.write(TEXT); + w.newLine(); + w.close(); + return f; + } + + private File bzip2File(String ext) throws Exception { + File f = new File(testDir,"testfile." + ext); + OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.BZIP2, new FileOutputStream(f)); + BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os)); + w.write(TEXT); + w.newLine(); + w.close(); + return f; + } + + private void assertReadText(File f) throws Exception { + ContentSource src = new ContentSource() { + @Override + public void close() throws IOException { + } + @Override + public DocData getNextDocData(DocData docData) throws NoMoreDataException, + IOException { + return null; + } + }; + InputStream ir = src.getInputStream(f); + InputStreamReader in = new InputStreamReader(ir); + BufferedReader r = new BufferedReader(in); + String line = r.readLine(); + assertEquals("Wrong text found in "+f.getName(), TEXT, line); + r.close(); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + testDir = new File(getWorkDir(),"ContentSourceTest"); + _TestUtil.rmDir(testDir); + assertTrue(testDir.mkdirs()); + } + + @After + public void tearDown() throws Exception { + _TestUtil.rmDir(testDir); + super.tearDown(); + } + +}