mirror of https://github.com/apache/lucene.git
LUCENE-981 and LUCENE-980: Added new AnalyzerTask and fixed issue with long strings in Format.java
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@567262 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c1a76d9dbe
commit
c67fd79a83
|
@ -4,6 +4,9 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
|||
|
||||
$Id:$
|
||||
|
||||
8/15/07
|
||||
LUCENE-
|
||||
|
||||
8/9/07
|
||||
LUCENE-971: Change enwiki tasks to a doc maker (extending
|
||||
LineDocMaker) that directly processes the Wikipedia XML and produces
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
#/**
|
||||
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# * contributor license agreements. See the NOTICE file distributed with
|
||||
# * this work for additional information regarding copyright ownership.
|
||||
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# * (the "License"); you may not use this file except in compliance with
|
||||
# * the License. You may obtain a copy of the License at
|
||||
# *
|
||||
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||
# *
|
||||
# * Unless required by applicable law or agreed to in writing, software
|
||||
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# * See the License for the specific language governing permissions and
|
||||
# * limitations under the License.
|
||||
# */
|
||||
# -------------------------------------------------------------------------------------
|
||||
# multi val params are iterated by NewRound's, added to reports, start with column name.
|
||||
|
||||
merge.factor=mrg:10
|
||||
#:100:10:100
|
||||
max.buffered=buf:10
|
||||
#:10:100:100
|
||||
compound=true
|
||||
|
||||
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
|
||||
directory=FSDirectory
|
||||
#directory=RamDirectory
|
||||
|
||||
doc.stored=true
|
||||
doc.tokenized=true
|
||||
doc.term.vector=false
|
||||
doc.add.log.step=500
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
|
||||
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
|
||||
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
|
||||
|
||||
#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
|
||||
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
|
||||
|
||||
# task at this depth or less would print when they start
|
||||
task.max.depth.log=2
|
||||
|
||||
log.queries=true
|
||||
# -------------------------------------------------------------------------------------
|
||||
|
||||
{ "Rounds"
|
||||
|
||||
ResetSystemErase
|
||||
#If the analyzer is in o.a.l.analysis, then just the classname can be used, otherwise the FQN must be used
|
||||
#Standard Analyzer can be shortened to standard.StandardAnalyzer
|
||||
{"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
|
||||
{ "Populate"
|
||||
CreateIndex
|
||||
{ "MAddDocs" AddDoc > : 2000
|
||||
Optimize
|
||||
CloseIndex
|
||||
}
|
||||
|
||||
OpenReader
|
||||
{ "SearchSameRdr" Search > : 5000
|
||||
CloseReader
|
||||
|
||||
{ "WarmNewRdr" Warm > : 50
|
||||
|
||||
{ "SrchNewRdr" Search > : 500
|
||||
|
||||
{ "SrchTrvNewRdr" SearchTrav(1000) > : 300
|
||||
|
||||
{ "SrchTrvRetNewRdr" SearchTravRet(2000) > : 100
|
||||
|
||||
NewRound
|
||||
|
||||
} : 4
|
||||
|
||||
RepSumByName
|
||||
RepSumByPrefRound MAddDocs
|
|
@ -17,10 +17,6 @@ package org.apache.lucene.benchmark.byTask;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
|
||||
|
@ -28,13 +24,17 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
|||
import org.apache.lucene.benchmark.byTask.stats.Points;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -207,6 +207,11 @@ public class PerfRunData {
|
|||
return analyzer;
|
||||
}
|
||||
|
||||
|
||||
public void setAnalyzer(Analyzer analyzer) {
|
||||
this.analyzer = analyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the docMaker.
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
/**
|
||||
* Create a new {@link org.apache.lucene.analysis.Analyzer} and set it it in the getRunData() for use by all future tasks.
|
||||
*
|
||||
*/
|
||||
public class NewAnalyzerTask extends PerfTask {
|
||||
private List/*<String>*/ analyzerClassNames;
|
||||
private int current;
|
||||
|
||||
public NewAnalyzerTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
analyzerClassNames = new ArrayList();
|
||||
}
|
||||
|
||||
public int doLogic() throws IOException {
|
||||
String className = null;
|
||||
try {
|
||||
if (current >= analyzerClassNames.size())
|
||||
{
|
||||
current = 0;
|
||||
}
|
||||
className = (String) analyzerClassNames.get(current++);
|
||||
if (className == null || className.equals(""))
|
||||
{
|
||||
className = "org.apache.lucene.analysis.standard.StandardAnalyzer";
|
||||
}
|
||||
if (className.indexOf(".") == -1 || className.startsWith("standard."))//there is no package name, assume o.a.l.analysis
|
||||
{
|
||||
className = "org.apache.lucene.analysis." + className;
|
||||
}
|
||||
getRunData().setAnalyzer((Analyzer) Class.forName(className).newInstance());
|
||||
System.out.println("Changed Analyzer to: " + className);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Error creating Analyzer: " + className, e);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the params (analyzerClassName only), Comma-separate list of Analyzer class names. If the Analyzer lives in
|
||||
* org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a part of the Fully Qualified Class Name.
|
||||
* <p/>
|
||||
* Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
|
||||
* @param params analyzerClassName, or empty for the StandardAnalyzer
|
||||
*/
|
||||
public void setParams(String params) {
|
||||
super.setParams(params);
|
||||
for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
|
||||
String s = tokenizer.nextToken();
|
||||
analyzerClassNames.add(s.trim());
|
||||
}
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
|
||||
*/
|
||||
public boolean supportsParams() {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -82,7 +82,8 @@ public class Format {
|
|||
* @return formatted string.
|
||||
*/
|
||||
public static String format(String s, String col) {
|
||||
return (s + padd).substring(0, col.length());
|
||||
String s1 = (s + padd);
|
||||
return s1.substring(0, Math.min(col.length(), s1.length()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue