LUCENE-2980: Benchmark's ContentSource made insensitive to letter case of file suffix.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1084544 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2011-03-23 11:38:54 +00:00
parent cc249256ca
commit d123b8a224
3 changed files with 170 additions and 10 deletions

View File

@ -2,6 +2,12 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
03/22/2011
LUCENE-2980: Benchmark's ContentSource no more requires lower case file suffixes
for detecting file type (gzip/bzip2/text). As part of this fix worked around an
issue with gzip/bzip input streams which were remaining open (See COMPRESS-127).
(Doron Cohen)
03/22/2011 03/22/2011
LUCENE-2978: Upgrade benchmark's commons-compress from 1.0 to 1.1 as LUCENE-2978: Upgrade benchmark's commons-compress from 1.0 to 1.1 as
the move of gzip decompression in LUCENE-1540 from Java's GZipInputStream the move of gzip decompression in LUCENE-1540 from Java's GZipInputStream

View File

@ -25,6 +25,7 @@ import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorException;
@ -128,23 +129,49 @@ public abstract class ContentSource {
int idx = fileName.lastIndexOf('.'); int idx = fileName.lastIndexOf('.');
String type = null; String type = null;
if (idx != -1) { if (idx != -1) {
type = extensionToType.get(fileName.substring(idx)); type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
} }
try { if (type!=null) { // bzip or gzip
if (type!=null) { // bzip or gzip try {
return csFactory.createCompressorInputStream(type, is); return closableCompressorInputStream(type,is);
} } catch (CompressorException e) {
} catch (CompressorException e) { IOException ioe = new IOException(e.getMessage());
IOException ioe = new IOException(e.getMessage()); ioe.initCause(e);
ioe.initCause(e); throw ioe;
throw ioe; }
} }
return is; return is;
} }
/** /**
* Wrap the compressor input stream so that calling close will also close
* the underlying stream - workaround for CommonsCompress bug (COMPRESS-127).
*/
private InputStream closableCompressorInputStream(String type, final InputStream is) throws CompressorException {
final InputStream delegee = csFactory.createCompressorInputStream(type, is);
if (!type.equals(CompressorStreamFactory.GZIP)) {
return delegee; //compressor bug affects only gzip
}
return new InputStream() {
@Override public int read() throws IOException { return delegee.read(); }
@Override public int read(byte[] b) throws IOException { return delegee.read(b); }
@Override public int available() throws IOException { return delegee.available(); }
@Override public synchronized void mark(int readlimit) { delegee.mark(readlimit); }
@Override public boolean markSupported() { return delegee.markSupported(); }
@Override public int read(byte[] b, int off, int len) throws IOException { return delegee.read(b, off, len); }
@Override public synchronized void reset() throws IOException { delegee.reset(); }
@Override public long skip(long n) throws IOException { return delegee.skip(n); }
@Override
public void close() throws IOException {
delegee.close();
is.close();
}
};
}
/**
* Returns true whether it's time to log a message (depending on verbose and * Returns true whether it's time to log a message (depending on verbose and
* the number of documents generated). * the number of documents generated).
*/ */

View File

@ -0,0 +1,127 @@
package org.apache.lucene.benchmark.byTask.feeds;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.BenchmarkTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class ContentSourceTest extends BenchmarkTestCase {
private static final String TEXT = "Some-Text...";
private File testDir;
private CompressorStreamFactory csFactory = new CompressorStreamFactory();
@Test
public void testGetInputStreamPlainText() throws Exception {
assertReadText(textFile("txt"));
assertReadText(textFile("TXT"));
}
@Test
public void testGetInputStreamGzip() throws Exception {
assertReadText(gzipFile("gz"));
assertReadText(gzipFile("gzip"));
assertReadText(gzipFile("GZ"));
assertReadText(gzipFile("GZIP"));
}
@Test
public void testGetInputStreamBzip2() throws Exception {
assertReadText(bzip2File("bz2"));
assertReadText(bzip2File("bzip"));
assertReadText(bzip2File("BZ2"));
assertReadText(bzip2File("BZIP"));
}
private File textFile(String ext) throws Exception {
File f = new File(testDir,"testfile." + ext);
BufferedWriter w = new BufferedWriter(new FileWriter(f));
w.write(TEXT);
w.newLine();
w.close();
return f;
}
private File gzipFile(String ext) throws Exception {
File f = new File(testDir,"testfile." + ext);
OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.GZIP, new FileOutputStream(f));
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
w.write(TEXT);
w.newLine();
w.close();
return f;
}
private File bzip2File(String ext) throws Exception {
File f = new File(testDir,"testfile." + ext);
OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.BZIP2, new FileOutputStream(f));
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
w.write(TEXT);
w.newLine();
w.close();
return f;
}
private void assertReadText(File f) throws Exception {
ContentSource src = new ContentSource() {
@Override
public void close() throws IOException {
}
@Override
public DocData getNextDocData(DocData docData) throws NoMoreDataException,
IOException {
return null;
}
};
InputStream ir = src.getInputStream(f);
InputStreamReader in = new InputStreamReader(ir);
BufferedReader r = new BufferedReader(in);
String line = r.readLine();
assertEquals("Wrong text found in "+f.getName(), TEXT, line);
r.close();
}
@Before
public void setUp() throws Exception {
super.setUp();
testDir = new File(getWorkDir(),"ContentSourceTest");
_TestUtil.rmDir(testDir);
assertTrue(testDir.mkdirs());
}
@After
public void tearDown() throws Exception {
_TestUtil.rmDir(testDir);
super.tearDown();
}
}