LUCENE-979: remove deprecated non-Task benchmark classes

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@783794 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-06-11 14:26:32 +00:00
parent 9dc231fb74
commit 159573f7eb
7 changed files with 0 additions and 865 deletions

View File

@ -1,61 +0,0 @@
package org.apache.lucene.benchmark;
import java.io.File;
import java.io.IOException;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
* @deprecated Use the Task based benchmarker
**/
public abstract class AbstractBenchmarker implements Benchmarker
{
/**
* Delete files and directories, even if non-empty.
*
* @param dir file or directory
* @return true on success, false if no or part of files have been deleted
* @throws java.io.IOException
*/
public static boolean fullyDelete(File dir) throws IOException
{
if (dir == null || !dir.exists()) return false;
File contents[] = dir.listFiles();
if (contents != null)
{
for (int i = 0; i < contents.length; i++)
{
if (contents[i].isFile())
{
if (!contents[i].delete())
{
return false;
}
}
else
{
if (!fullyDelete(contents[i]))
{
return false;
}
}
}
}
return dir.delete();
}
}

View File

@ -1,30 +0,0 @@
package org.apache.lucene.benchmark;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Marker Interface defining some common options. Implementations should define their own set of options that can be
* cast to in the {@link Benchmarker} interface.
* <p/>
* As benchmarks are added, perhaps a common set of Options will become clear
*
*
* @deprecated Use the task based approach instead
**/
public interface BenchmarkOptions
{
}

View File

@ -1,39 +0,0 @@
package org.apache.lucene.benchmark;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.benchmark.stats.TestData;
import java.io.File;
/**
*
* @deprecated Use the Task based benchmarker
**/
public interface Benchmarker
{
/**
* Benchmark according to the implementation, using the workingDir as the place to store things.
*
* @param workingDir The {@link java.io.File} directory to store temporary data in for running the benchmark
* @param options Any {@link BenchmarkOptions} that are needed for this benchmark. This
* @return The {@link org.apache.lucene.benchmark.stats.TestData} used to run the benchmark.
*/
TestData[] benchmark(File workingDir, BenchmarkOptions options) throws Exception;
}

View File

@ -1,145 +0,0 @@
package org.apache.lucene.benchmark;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.commons.digester.Digester;
import org.apache.lucene.benchmark.standard.StandardBenchmarker;
import org.apache.lucene.benchmark.stats.TestData;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Sets up the benchmark driver.
*
**/
public class Driver
{
private File workingDir;
private Benchmarker benchmarker;
private BenchmarkOptions options;
public Driver()
{
}
public Driver(Benchmarker benchmarker, BenchmarkOptions options)
{
this.benchmarker = benchmarker;
this.options = options;
}
/**
* Creates a Driver using Digester
* @param inputSource
*/
public Driver(File workingDir, InputSource inputSource) throws IOException, SAXException
{
Digester digester = new Digester();
digester.setValidating(false);
digester.addObjectCreate("benchmark/benchmarker", "class", StandardBenchmarker.class);
digester.addSetProperties("benchmark/benchmarker");
digester.addSetNext("benchmark/benchmarker", "setBenchmarker");
digester.addObjectCreate("benchmark/options", "class", BenchmarkOptions.class);
digester.addSetProperties("benchmark/options");
digester.addSetNext("benchmark/options", "setOptions");
digester.push(this);
digester.parse(inputSource);
this.workingDir = workingDir;
}
private void run() throws Exception
{
TestData [] data = benchmarker.benchmark(workingDir, options);
//Print out summary:
/*System.out.println("Test Data:");
for (int i = 0; i < data.length; i++)
{
TestData testData = data[i];
System.out.println("---------------");
System.out.println(testData.showRunData(testData.getId()));
System.out.println("---------------");
}*/
}
public Benchmarker getBenchmarker()
{
return benchmarker;
}
public void setBenchmarker(Benchmarker benchmarker)
{
this.benchmarker = benchmarker;
}
public BenchmarkOptions getOptions()
{
return options;
}
public void setOptions(BenchmarkOptions options)
{
this.options = options;
}
public File getWorkingDir()
{
return workingDir;
}
public void setWorkingDir(File workingDir)
{
this.workingDir = workingDir;
}
public static void main(String[] args)
{
if (args.length != 2)
{
printHelp(args);
System.exit(0);
}
File workingDir = new File(args[0]);
File configFile = new File(args[1]);
if (configFile.exists())
{
//Setup
try
{
Driver driver = new Driver(workingDir, new InputSource(new FileReader(configFile)));
driver.run();
}
catch (Exception e)
{
e.printStackTrace(System.err);
}
}
}
private static void printHelp(String[] args)
{
System.out.println("Usage: java -cp [...] " + Driver.class.getName() + "<working dir> <config-file>");
}
}

View File

@ -1,59 +0,0 @@
package org.apache.lucene.benchmark.standard;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @deprecated Use Task based benchmarker
*
**/
public class ReutersQueries
{
public static String [] STANDARD_QUERIES = {
//Start with some short queries
"Salomon", "Comex", "night trading", "Japan Sony",
//Try some Phrase Queries
"\"Sony Japan\"", "\"food needs\"~3",
"\"World Bank\"^2 AND Nigeria", "\"World Bank\" -Nigeria",
"\"Ford Credit\"~5",
//Try some longer queries
"airline Europe Canada destination",
"Long term pressure by trade " +
"ministers is necessary if the current Uruguay round of talks on " +
"the General Agreement on Trade and Tariffs (GATT) is to " +
"succeed"
};
public static Query[] getPrebuiltQueries(String field)
{
//be wary of unanalyzed text
return new Query[]{
new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5),
new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading"))}, 4, false),
new SpanNearQuery(new SpanQuery[]{new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit"))}, 10, false),
new WildcardQuery(new Term(field, "fo*")),
};
}
}

View File

@ -1,453 +0,0 @@
package org.apache.lucene.benchmark.standard;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.benchmark.AbstractBenchmarker;
import org.apache.lucene.benchmark.BenchmarkOptions;
import org.apache.lucene.benchmark.Benchmarker;
import org.apache.lucene.benchmark.stats.QueryData;
import org.apache.lucene.benchmark.stats.TestData;
import org.apache.lucene.benchmark.stats.TestRunData;
import org.apache.lucene.benchmark.stats.TimeData;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.FSDirectory;
import java.io.*;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Reads in the Reuters Collection, downloaded from http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz
* in the workingDir/reuters and indexes them using the {@link org.apache.lucene.analysis.standard.StandardAnalyzer}
*<p/>
* Runs a standard set of documents through an Indexer and then runs a standard set of queries against the index.
*
* @see org.apache.lucene.benchmark.standard.StandardBenchmarker#benchmark(java.io.File, org.apache.lucene.benchmark.BenchmarkOptions)
*
* @deprecated use the byTask code instead. See http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/javadoc/org/apache/lucene/benchmark/byTask/package-summary.html .
**/
public class StandardBenchmarker extends AbstractBenchmarker implements Benchmarker
{
public static final String SOURCE_DIR = "reuters-out";
public static final String INDEX_DIR = "index";
//30-MAR-1987 14:22:36.87
private static DateFormat format = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
//DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.SHORT);
static{
format.setLenient(true);
}
public StandardBenchmarker()
{
}
public TestData [] benchmark(File workingDir, BenchmarkOptions opts) throws Exception
{
StandardOptions options = (StandardOptions) opts;
workingDir.mkdirs();
File sourceDir = getSourceDirectory(workingDir);
sourceDir.mkdirs();
File indexDir = new File(workingDir, INDEX_DIR);
indexDir.mkdirs();
Analyzer a = new StandardAnalyzer();
List queryList = new ArrayList(20);
queryList.addAll(Arrays.asList(ReutersQueries.STANDARD_QUERIES));
queryList.addAll(Arrays.asList(ReutersQueries.getPrebuiltQueries("body")));
Query[] qs = createQueries(queryList, a);
// Here you can limit the set of query benchmarks
QueryData[] qds = QueryData.getAll(qs);
// Here you can narrow down the set of test parameters
TestData[] params = TestData.getTestDataMinMaxMergeAndMaxBuffered(new File[]{sourceDir/*, jumboDir*/}, new Analyzer[]{a});//TestData.getAll(new File[]{sourceDir, jumboDir}, new Analyzer[]{a});
System.out.println("Testing " + params.length + " different permutations.");
for (int i = 0; i < params.length; i++)
{
try
{
reset(indexDir);
params[i].setDirectory(FSDirectory.open(indexDir));
params[i].setQueries(qds);
System.out.println(params[i]);
runBenchmark(params[i], options);
// Here you can collect and output the runData for further processing.
System.out.println(params[i].showRunData(params[i].getId()));
//bench.runSearchBenchmark(queries, dir);
params[i].getDirectory().close();
System.runFinalization();
System.gc();
}
catch (Exception e)
{
e.printStackTrace();
System.out.println("EXCEPTION: " + e.getMessage());
//break;
}
}
return params;
}
protected File getSourceDirectory(File workingDir)
{
return new File(workingDir, SOURCE_DIR);
}
/**
* Run benchmark using supplied parameters.
*
* @param params benchmark parameters
* @throws Exception
*/
protected void runBenchmark(TestData params, StandardOptions options) throws Exception
{
System.out.println("Start Time: " + new Date());
int runCount = options.getRunCount();
for (int i = 0; i < runCount; i++)
{
TestRunData trd = new TestRunData();
trd.startRun();
trd.setId(String.valueOf(i));
IndexWriter iw = new IndexWriter(params.getDirectory(), params.getAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
iw.setMergeFactor(params.getMergeFactor());
iw.setMaxBufferedDocs(params.getMaxBufferedDocs());
iw.setUseCompoundFile(params.isCompound());
makeIndex(trd, params.getSource(), iw, true, true, false, options);
if (params.isOptimize())
{
TimeData td = new TimeData("optimize");
trd.addData(td);
td.start();
iw.optimize();
td.stop();
trd.addData(td);
}
iw.close();
QueryData[] queries = params.getQueries();
if (queries != null)
{
IndexReader ir = null;
IndexSearcher searcher = null;
for (int k = 0; k < queries.length; k++)
{
QueryData qd = queries[k];
if (ir != null && qd.reopen)
{
searcher.close();
ir.close();
ir = null;
searcher = null;
}
if (ir == null)
{
ir = IndexReader.open(params.getDirectory());
searcher = new IndexSearcher(ir);
}
Document doc = null;
if (qd.warmup)
{
TimeData td = new TimeData(qd.id + "-warm");
for (int m = 0; m < ir.maxDoc(); m++)
{
td.start();
if (ir.isDeleted(m))
{
td.stop();
continue;
}
doc = ir.document(m);
td.stop();
}
trd.addData(td);
}
TimeData td = new TimeData(qd.id + "-srch");
td.start();
Hits h = searcher.search(qd.q);
//System.out.println("Hits Size: " + h.length() + " Query: " + qd.q);
td.stop();
trd.addData(td);
td = new TimeData(qd.id + "-trav");
if (h != null && h.length() > 0)
{
for (int m = 0; m < h.length(); m++)
{
td.start();
int id = h.id(m);
if (qd.retrieve)
{
doc = ir.document(id);
}
td.stop();
}
}
trd.addData(td);
}
try
{
if (searcher != null)
{
searcher.close();
}
}
catch (Exception e)
{
}
;
try
{
if (ir != null)
{
ir.close();
}
}
catch (Exception e)
{
}
;
}
trd.endRun();
params.getRunData().add(trd);
//System.out.println(params[i].showRunData(params[i].getId()));
//params.showRunData(params.getId());
}
System.out.println("End Time: " + new Date());
}
/**
* Parse the Reuters SGML and index:
* Date, Title, Dateline, Body
*
*
*
* @param in input file
* @return Lucene document
*/
protected Document makeDocument(File in, String[] tags, boolean stored, boolean tokenized, boolean tfv)
throws Exception
{
Document doc = new Document();
// tag this document
if (tags != null)
{
for (int i = 0; i < tags.length; i++)
{
doc.add(new Field("tag" + i, tags[i], stored == true ? Field.Store.YES : Field.Store.NO,
tokenized == true ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO));
}
}
doc.add(new Field("file", in.getCanonicalPath(), stored == true ? Field.Store.YES : Field.Store.NO,
tokenized == true ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO));
BufferedReader reader = new BufferedReader(new FileReader(in));
String line = null;
//First line is the date, 3rd is the title, rest is body
String dateStr = reader.readLine();
reader.readLine();//skip an empty line
String title = reader.readLine();
reader.readLine();//skip an empty line
StringBuffer body = new StringBuffer(1024);
while ((line = reader.readLine()) != null)
{
body.append(line).append(' ');
}
reader.close();
Date date = format.parse(dateStr.trim());
doc.add(new Field("date", DateTools.dateToString(date, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));
if (title != null)
{
doc.add(new Field("title", title, stored == true ? Field.Store.YES : Field.Store.NO,
tokenized == true ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO));
}
if (body.length() > 0)
{
doc.add(new Field("body", body.toString(), stored == true ? Field.Store.YES : Field.Store.NO,
tokenized == true ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED, tfv == true ? Field.TermVector.YES : Field.TermVector.NO));
}
return doc;
}
/**
* Make index, and collect time data.
*
* @param trd run data to populate
* @param srcDir directory with source files
* @param iw index writer, already open
* @param stored store values of fields
* @param tokenized tokenize fields
* @param tfv store term vectors
* @throws Exception
*/
protected void makeIndex(TestRunData trd, File srcDir, IndexWriter iw, boolean stored, boolean tokenized,
boolean tfv, StandardOptions options) throws Exception
{
//File[] groups = srcDir.listFiles();
List files = new ArrayList();
getAllFiles(srcDir, null, files);
Document doc = null;
long cnt = 0L;
TimeData td = new TimeData();
td.name = "addDocument";
int scaleUp = options.getScaleUp();
int logStep = options.getLogStep();
int max = Math.min(files.size(), options.getMaximumDocumentsToIndex());
for (int s = 0; s < scaleUp; s++)
{
String[] tags = new String[]{srcDir.getName() + "/" + s};
int i = 0;
for (Iterator iterator = files.iterator(); iterator.hasNext() && i < max; i++)
{
File file = (File) iterator.next();
doc = makeDocument(file, tags, stored, tokenized, tfv);
td.start();
iw.addDocument(doc);
td.stop();
cnt++;
if (cnt % logStep == 0)
{
System.err.println(" - processed " + cnt + ", run id=" + trd.getId());
trd.addData(td);
td.reset();
}
}
}
trd.addData(td);
}
public static void getAllFiles(File srcDir, FileFilter filter, List allFiles)
{
File [] files = srcDir.listFiles(filter);
for (int i = 0; i < files.length; i++)
{
File file = files[i];
if (file.isDirectory())
{
getAllFiles(file, filter, allFiles);
}
else
{
allFiles.add(file);
}
}
}
/**
* Parse the strings containing Lucene queries.
*
* @param qs array of strings containing query expressions
* @param a analyzer to use when parsing queries
* @return array of Lucene queries
*/
public static Query[] createQueries(List qs, Analyzer a)
{
QueryParser qp = new QueryParser("body", a);
List queries = new ArrayList();
for (int i = 0; i < qs.size(); i++)
{
try
{
Object query = qs.get(i);
Query q = null;
if (query instanceof String)
{
q = qp.parse((String) query);
}
else if (query instanceof Query)
{
q = (Query) query;
}
else
{
System.err.println("Unsupported Query Type: " + query);
}
if (q != null)
{
queries.add(q);
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
return (Query[]) queries.toArray(new Query[0]);
}
/**
* Remove existing index.
*
* @throws Exception
*/
protected void reset(File indexDir) throws Exception
{
if (indexDir.exists())
{
fullyDelete(indexDir);
}
indexDir.mkdirs();
}
/**
* Save a stream to a file.
*
* @param is input stream
* @param out output file
* @param closeInput if true, close the input stream when done.
* @throws Exception
*/
protected void saveStream(InputStream is, File out, boolean closeInput) throws Exception
{
byte[] buf = new byte[4096];
FileOutputStream fos = new FileOutputStream(out);
int len = 0;
long total = 0L;
long time = System.currentTimeMillis();
long delta = time;
while ((len = is.read(buf)) > 0)
{
fos.write(buf, 0, len);
total += len;
time = System.currentTimeMillis();
if (time - delta > 5000)
{
System.err.println(" - copied " + total / 1024 + " kB...");
delta = time;
}
}
fos.flush();
fos.close();
if (closeInput)
{
is.close();
}
}
}

View File

@ -1,78 +0,0 @@
package org.apache.lucene.benchmark.standard;
import org.apache.lucene.benchmark.BenchmarkOptions;
import org.apache.lucene.benchmark.Constants;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
* @deprecated Use the Task based stuff instead
**/
public class StandardOptions implements BenchmarkOptions
{
private int runCount = Constants.DEFAULT_RUN_COUNT;
private int logStep = Constants.DEFAULT_LOG_STEP;
private int scaleUp = Constants.DEFAULT_SCALE_UP;
private int maximumDocumentsToIndex = Constants.DEFAULT_MAXIMUM_DOCUMENTS;
public int getMaximumDocumentsToIndex()
{
return maximumDocumentsToIndex;
}
public void setMaximumDocumentsToIndex(int maximumDocumentsToIndex)
{
this.maximumDocumentsToIndex = maximumDocumentsToIndex;
}
/**
* How often to print out log messages when in benchmark loops
*/
public int getLogStep()
{
return logStep;
}
public void setLogStep(int logStep)
{
this.logStep = logStep;
}
/**
* The number of times to run the benchmark
*/
public int getRunCount()
{
return runCount;
}
public void setRunCount(int runCount)
{
this.runCount = runCount;
}
public int getScaleUp()
{
return scaleUp;
}
public void setScaleUp(int scaleUp)
{
this.scaleUp = scaleUp;
}
}