LUCENE-981 and LUCENE-980: Added new AnalyzerTask and fixed issue with long strings in Format.java

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@567262 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2007-08-18 12:24:21 +00:00
parent c1a76d9dbe
commit c67fd79a83
5 changed files with 180 additions and 7 deletions

View File

@ -4,6 +4,9 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
$Id:$
8/15/07
LUCENE-
8/9/07
LUCENE-971: Change enwiki tasks to a doc maker (extending
LineDocMaker) that directly processes the Wikipedia XML and produces

View File

@ -0,0 +1,80 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
merge.factor=mrg:10
#:100:10:100
max.buffered=buf:10
#:10:100:100
compound=true
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
doc.add.log.step=500
docs.dir=reuters-out
#docs.dir=reuters-111
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
#If the analyzer is in o.a.l.analysis, then just the classname can be used, otherwise the FQN must be used
#Standard Analyzer can be shortened to standard.StandardAnalyzer
{"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
{ "Populate"
CreateIndex
{ "MAddDocs" AddDoc > : 2000
Optimize
CloseIndex
}
OpenReader
{ "SearchSameRdr" Search > : 5000
CloseReader
{ "WarmNewRdr" Warm > : 50
{ "SrchNewRdr" Search > : 500
{ "SrchTrvNewRdr" SearchTrav(1000) > : 300
{ "SrchTrvRetNewRdr" SearchTravRet(2000) > : 100
NewRound
} : 4
RepSumByName
RepSumByPrefRound MAddDocs

View File

@ -17,10 +17,6 @@ package org.apache.lucene.benchmark.byTask;
* limitations under the License.
*/
import java.io.File;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
@ -28,13 +24,17 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
import java.io.File;
import java.util.HashMap;
import java.util.Iterator;
/**
@ -207,6 +207,11 @@ public class PerfRunData {
return analyzer;
}
public void setAnalyzer(Analyzer analyzer) {
this.analyzer = analyzer;
}
/**
* @return Returns the docMaker.
*/

View File

@ -0,0 +1,84 @@
package org.apache.lucene.benchmark.byTask.tasks;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
/**
* Create a new {@link org.apache.lucene.analysis.Analyzer} and set it it in the getRunData() for use by all future tasks.
*
*/
public class NewAnalyzerTask extends PerfTask {
private List/*<String>*/ analyzerClassNames;
private int current;
public NewAnalyzerTask(PerfRunData runData) {
super(runData);
analyzerClassNames = new ArrayList();
}
public int doLogic() throws IOException {
String className = null;
try {
if (current >= analyzerClassNames.size())
{
current = 0;
}
className = (String) analyzerClassNames.get(current++);
if (className == null || className.equals(""))
{
className = "org.apache.lucene.analysis.standard.StandardAnalyzer";
}
if (className.indexOf(".") == -1 || className.startsWith("standard."))//there is no package name, assume o.a.l.analysis
{
className = "org.apache.lucene.analysis." + className;
}
getRunData().setAnalyzer((Analyzer) Class.forName(className).newInstance());
System.out.println("Changed Analyzer to: " + className);
} catch (Exception e) {
throw new RuntimeException("Error creating Analyzer: " + className, e);
}
return 1;
}
/**
* Set the params (analyzerClassName only), Comma-separate list of Analyzer class names. If the Analyzer lives in
* org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a part of the Fully Qualified Class Name.
* <p/>
* Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
* @param params analyzerClassName, or empty for the StandardAnalyzer
*/
public void setParams(String params) {
super.setParams(params);
for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
String s = tokenizer.nextToken();
analyzerClassNames.add(s.trim());
}
}
/* (non-Javadoc)
* @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
*/
public boolean supportsParams() {
return true;
}
}

View File

@ -82,7 +82,8 @@ public class Format {
* @return formatted string.
*/
public static String format(String s, String col) {
return (s + padd).substring(0, col.length());
String s1 = (s + padd);
return s1.substring(0, Math.min(col.length(), s1.length()));
}
/**