LUCENE-10100: configuration items of the alg file are adapted to the 9.0 branch (#301)

This commit is contained in:
xiaoshi 2023-11-02 19:02:31 +08:00 committed by GitHub
parent cbb5b6e331
commit d62fb5309e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 43 additions and 40 deletions

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=500
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -21,7 +21,7 @@
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
collector.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory

View File

@ -21,7 +21,7 @@
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
collector.class=coll:topScoreDoc
analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
directory=FSDirectory

View File

@ -37,8 +37,8 @@ doc.term.vector=vector:true:true:false:false
log.step=500
log.step.DeleteDoc=100
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -20,7 +20,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false
doc.body.tokenized=true
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
-AnalyzerFactory(name:original-porter-stemmer,StandardTokenizer,
EnglishPossessiveFilter,LowerCaseFilter,StopFilter,

View File

@ -30,7 +30,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=1000
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -30,7 +30,8 @@ doc.term.vector.offsets=false
doc.term.vector.positions=false
log.step=2000
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -32,8 +32,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -31,8 +31,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=500
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -42,8 +42,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=500
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
#content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -16,7 +16,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false
doc.body.tokenized=true
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
log.step=1000
-AnalyzerFactory(name:shingle-bigrams-unigrams,

View File

@ -30,7 +30,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=500
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
#docs.dir=reuters-111
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource

View File

@ -31,7 +31,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=100000
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource

View File

@ -31,8 +31,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -31,8 +31,8 @@ doc.tokenized=true
doc.term.vector=false
log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
work.dir=data
docs.dir=reuters21578
#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource

View File

@ -18,7 +18,8 @@
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
doc.tokenized=false
doc.body.tokenized=true
docs.dir=reuters-out
work.dir=data
docs.dir=reuters21578
-AnalyzerFactory(name:WhitespaceTokenizer, WhitespaceTokenizer(rule:java))

View File

@ -23,9 +23,9 @@ import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
import org.apache.lucene.util.Version;
/**
* Create a new {@link org.apache.lucene.analysis.Analyzer} and set it in the getRunData() for use
@ -42,17 +42,13 @@ public class NewAnalyzerTask extends PerfTask {
public static final Analyzer createAnalyzer(String className) throws Exception {
final Class<? extends Analyzer> clazz = Class.forName(className).asSubclass(Analyzer.class);
try {
// first try to use a ctor with version parameter (needed for many new Analyzers that have no
// default one anymore
Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
return cnstr.newInstance(Version.LATEST);
} catch (
@SuppressWarnings("unused")
NoSuchMethodException nsme) {
// otherwise use default ctor
return clazz.getConstructor().newInstance();
Constructor<? extends Analyzer> cnstr;
if (className.equals("org.apache.lucene.analysis.core.StopAnalyzer")) {
cnstr = clazz.getConstructor(CharArraySet.class);
return cnstr.newInstance(CharArraySet.EMPTY_SET);
}
cnstr = clazz.getConstructor();
return cnstr.newInstance();
}
@Override