mirror of https://github.com/apache/lucene.git
LUCENE-2343: add support for benchmarking collectors
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@927178 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
03216a150e
commit
eb6e13fe9e
|
@ -2,7 +2,10 @@ Lucene Benchmark Contrib Change Log
|
|||
|
||||
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
|
||||
|
||||
2/21/2020
|
||||
3/24/2010
|
||||
LUCENE-2343: Added support for benchmarking collectors. (Grant Ingersoll, Shai Erera)
|
||||
|
||||
2/21/2010
|
||||
LUCENE-2254: Add support to the quality package for running
|
||||
experiments with any combination of Title, Description, and Narrative.
|
||||
(Robert Muir)
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
#/**
|
||||
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# * contributor license agreements. See the NOTICE file distributed with
|
||||
# * this work for additional information regarding copyright ownership.
|
||||
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# * (the "License"); you may not use this file except in compliance with
|
||||
# * the License. You may obtain a copy of the License at
|
||||
# *
|
||||
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||
# *
|
||||
# * Unless required by applicable law or agreed to in writing, software
|
||||
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# * See the License for the specific language governing permissions and
|
||||
# * limitations under the License.
|
||||
# */
|
||||
# -------------------------------------------------------------------------------------
|
||||
# multi val params are iterated by NewRound's, added to reports, start with column name.
|
||||
|
||||
# collector.class can be:
|
||||
# Fully Qualified Class Name of a Collector with a empty constructor
|
||||
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
|
||||
# topScoreDocUnordered - Like above, but allows out of order
|
||||
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
|
||||
|
||||
analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer
|
||||
directory=FSDirectory
|
||||
#directory=RamDirectory
|
||||
|
||||
doc.stored=true
|
||||
doc.tokenized=true
|
||||
doc.term.vector=false
|
||||
log.step=100000
|
||||
|
||||
search.num.hits=100000
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishContentSource
|
||||
|
||||
|
||||
query.maker=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishQueryMaker
|
||||
|
||||
# task at this depth or less would print when they start
|
||||
task.max.depth.log=2
|
||||
|
||||
log.queries=true
|
||||
# -------------------------------------------------------------------------------------
|
||||
|
||||
{ "Rounds"
|
||||
|
||||
ResetSystemErase
|
||||
|
||||
{ "Populate"
|
||||
CreateIndex
|
||||
{ "MAddDocs" AddDoc } : 200000
|
||||
Optimize
|
||||
CloseIndex
|
||||
}
|
||||
|
||||
OpenReader
|
||||
{ "topDocs" SearchWithCollector > : 10
|
||||
CloseReader
|
||||
|
||||
# OpenReader
|
||||
#uses an array of search.num.hits size, but can also take in a parameter
|
||||
# { "psc" SearchWithPostSortCollector > : 10
|
||||
# { "psc100" SearchWithPostSortCollector(100) > : 10
|
||||
# { "psc1000" SearchWithPostSortCollector(1000) > : 10
|
||||
# { "psc10000" SearchWithPostSortCollector(10000) > : 10
|
||||
# { "psc50000" SearchWithPostSortCollector(50000) > : 10
|
||||
# CloseReader
|
||||
|
||||
RepSumByPref topDocs
|
||||
# RepSumByPref psc
|
||||
# RepSumByPref psc100
|
||||
# RepSumByPref psc1000
|
||||
# RepSumByPref psc10000
|
||||
# RepSumByPref psc50000
|
||||
|
||||
NewRound
|
||||
|
||||
} : 4
|
||||
|
||||
#RepSumByNameRound
|
||||
#RepSumByName
|
||||
#RepSumByPrefRound topDocs
|
||||
#RepSumByPrefRound psc
|
||||
#RepSumByPrefRound psc100
|
||||
#RepSumByPrefRound psc1000
|
||||
#RepSumByPrefRound psc10000
|
||||
#RepSumByPrefRound psc50000
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
#/**
|
||||
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# * contributor license agreements. See the NOTICE file distributed with
|
||||
# * this work for additional information regarding copyright ownership.
|
||||
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# * (the "License"); you may not use this file except in compliance with
|
||||
# * the License. You may obtain a copy of the License at
|
||||
# *
|
||||
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||
# *
|
||||
# * Unless required by applicable law or agreed to in writing, software
|
||||
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# * See the License for the specific language governing permissions and
|
||||
# * limitations under the License.
|
||||
# */
|
||||
# -------------------------------------------------------------------------------------
|
||||
# multi val params are iterated by NewRound's, added to reports, start with column name.
|
||||
|
||||
# collector.class can be:
|
||||
# Fully Qualified Class Name of a Collector with a empty constructor
|
||||
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
|
||||
# topScoreDocUnordered - Like above, but allows out of order
|
||||
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
|
||||
|
||||
analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer
|
||||
directory=FSDirectory
|
||||
#directory=RamDirectory
|
||||
|
||||
doc.stored=true
|
||||
doc.tokenized=true
|
||||
doc.term.vector=false
|
||||
log.step=100000
|
||||
|
||||
search.num.hits=1000000
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishContentSource
|
||||
|
||||
|
||||
query.maker=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishQueryMaker
|
||||
|
||||
# task at this depth or less would print when they start
|
||||
task.max.depth.log=2
|
||||
|
||||
log.queries=true
|
||||
# -------------------------------------------------------------------------------------
|
||||
|
||||
{ "Rounds"
|
||||
|
||||
ResetSystemErase
|
||||
|
||||
{ "Populate"
|
||||
CreateIndex
|
||||
{ "MAddDocs" AddDoc } : 2000000
|
||||
Optimize
|
||||
CloseIndex
|
||||
}
|
||||
|
||||
OpenReader
|
||||
{ "topDocs" SearchWithCollector > : 10
|
||||
CloseReader
|
||||
|
||||
# OpenReader
|
||||
#uses an array of search.num.hits size, but can also take in a parameter
|
||||
# { "psc" SearchWithPostSortCollector > : 10
|
||||
# { "psc100" SearchWithPostSortCollector(100) > : 10
|
||||
# { "psc1000" SearchWithPostSortCollector(1000) > : 10
|
||||
# { "psc10000" SearchWithPostSortCollector(10000) > : 10
|
||||
# { "psc50000" SearchWithPostSortCollector(50000) > : 10
|
||||
# CloseReader
|
||||
|
||||
RepSumByPref topDocs
|
||||
# RepSumByPref psc
|
||||
# RepSumByPref psc100
|
||||
# RepSumByPref psc1000
|
||||
# RepSumByPref psc10000
|
||||
# RepSumByPref psc50000
|
||||
|
||||
NewRound
|
||||
|
||||
} : 4
|
||||
|
||||
#RepSumByNameRound
|
||||
#RepSumByName
|
||||
#RepSumByPrefRound topDocs
|
||||
#RepSumByPrefRound psc
|
||||
#RepSumByPrefRound psc100
|
||||
#RepSumByPrefRound psc1000
|
||||
#RepSumByPrefRound psc10000
|
||||
#RepSumByPrefRound psc50000
|
||||
|
|
@ -30,10 +30,12 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -105,9 +107,10 @@ public abstract class ReadTask extends PerfTask {
|
|||
res++;
|
||||
Query q = queryMaker.makeQuery();
|
||||
Sort sort = getSort();
|
||||
TopDocs hits;
|
||||
TopDocs hits = null;
|
||||
final int numHits = numHits();
|
||||
if (numHits > 0) {
|
||||
if (withCollector() == false) {
|
||||
if (sort != null) {
|
||||
Weight w = q.weight(searcher);
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
|
||||
|
@ -119,9 +122,14 @@ public abstract class ReadTask extends PerfTask {
|
|||
} else {
|
||||
hits = searcher.search(q, numHits);
|
||||
}
|
||||
} else {
|
||||
Collector collector = createCollector();
|
||||
searcher.search(q, null, collector);
|
||||
//hits = collector.topDocs();
|
||||
}
|
||||
|
||||
final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
|
||||
if (printHitsField != null && printHitsField.length() > 0) {
|
||||
if (hits != null && printHitsField != null && printHitsField.length() > 0) {
|
||||
if (q instanceof MultiTermQuery) {
|
||||
System.out.println("MultiTermQuery term count = " + ((MultiTermQuery) q).getTotalNumberOfTerms());
|
||||
}
|
||||
|
@ -177,6 +185,9 @@ public abstract class ReadTask extends PerfTask {
|
|||
return res;
|
||||
}
|
||||
|
||||
protected Collector createCollector() throws Exception {
|
||||
return TopScoreDocCollector.create(numHits(), true);
|
||||
}
|
||||
|
||||
|
||||
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
|
||||
|
@ -193,6 +204,10 @@ public abstract class ReadTask extends PerfTask {
|
|||
*/
|
||||
public abstract boolean withSearch();
|
||||
|
||||
public boolean withCollector(){
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return true if warming should be performed.
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Does search w/ a custom collector
|
||||
*/
|
||||
public class SearchWithCollectorTask extends SearchTask {
|
||||
|
||||
protected String clnName;
|
||||
|
||||
public SearchWithCollectorTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setup() throws Exception {
|
||||
super.setup();
|
||||
//check to make sure either the doc is being stored
|
||||
PerfRunData runData = getRunData();
|
||||
Config config = runData.getConfig();
|
||||
clnName = config.get("collector.class", "");
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public boolean withCollector() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Collector createCollector() throws Exception {
|
||||
Collector collector = null;
|
||||
if (clnName.equalsIgnoreCase("topScoreDocOrdered") == true) {
|
||||
collector = TopScoreDocCollector.create(numHits(), true);
|
||||
} else if (clnName.equalsIgnoreCase("topScoreDocUnOrdered") == true) {
|
||||
collector = TopScoreDocCollector.create(numHits(), false);
|
||||
} else if (clnName.length() > 0){
|
||||
collector = Class.forName(clnName).asSubclass(Collector.class).newInstance();
|
||||
|
||||
} else {
|
||||
collector = super.createCollector();
|
||||
}
|
||||
return collector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryMaker getQueryMaker() {
|
||||
return getRunData().getQueryMaker(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean withRetrieve() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean withSearch() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean withTraverse() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean withWarm() {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
|
@ -22,19 +22,19 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
/**
|
||||
* Perf run configuration properties.
|
||||
* <p>
|
||||
* <p/>
|
||||
* Numeric property containing ":", e.g. "10:100:5" is interpreted
|
||||
* as array of numeric values. It is extracted once, on first use, and
|
||||
* maintain a round number to return the appropriate value.
|
||||
* <p>
|
||||
* <p/>
|
||||
* The config property "work.dir" tells where is the root of
|
||||
* docs data dirs and indexes dirs. It is set to either of: <ul>
|
||||
* <li>value supplied for it in the alg file;</li>
|
||||
|
@ -54,6 +54,7 @@ public class Config {
|
|||
|
||||
/**
|
||||
* Read both algorithm and config properties.
|
||||
*
|
||||
* @param algReader from where to read algorithm and config properties.
|
||||
* @throws IOException
|
||||
*/
|
||||
|
@ -121,17 +122,38 @@ public class Config {
|
|||
|
||||
/**
|
||||
* Return a string property.
|
||||
*
|
||||
* @param name name of property.
|
||||
* @param dflt default value.
|
||||
* @return a string property.
|
||||
*/
|
||||
public String get(String name, String dflt) {
|
||||
return props.getProperty(name,dflt);
|
||||
String vals[] = (String[]) valByRound.get(name);
|
||||
if (vals != null) {
|
||||
return vals[roundNumber % vals.length];
|
||||
}
|
||||
// done if not by round
|
||||
String sval = props.getProperty(name, dflt);
|
||||
if (sval == null) {
|
||||
return null;
|
||||
}
|
||||
if (sval.indexOf(":") < 0) {
|
||||
return sval;
|
||||
}
|
||||
// first time this prop is extracted by round
|
||||
int k = sval.indexOf(":");
|
||||
String colName = sval.substring(0, k);
|
||||
sval = sval.substring(k + 1);
|
||||
colForValByRound.put(name, colName);
|
||||
vals = propToStringArray(sval);
|
||||
valByRound.put(name, vals);
|
||||
return vals[roundNumber % vals.length];
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a property.
|
||||
* Note: once a multiple values property is set, it can no longer be modified.
|
||||
*
|
||||
* @param name name of property.
|
||||
* @param value either single or multiple property value (multiple values are separated by ":")
|
||||
* @throws Exception
|
||||
|
@ -148,6 +170,7 @@ public class Config {
|
|||
* If the property contain ":", e.g. "10:100:5", it is interpreted
|
||||
* as array of ints. It is extracted once, on first call
|
||||
* to get() it, and a by-round-value is returned.
|
||||
*
|
||||
* @param name name of property
|
||||
* @param dflt default value
|
||||
* @return a int property.
|
||||
|
@ -178,6 +201,7 @@ public class Config {
|
|||
* If the property contain ":", e.g. "10:100:5", it is interpreted
|
||||
* as array of doubles. It is extracted once, on first call
|
||||
* to get() it, and a by-round-value is returned.
|
||||
*
|
||||
* @param name name of property
|
||||
* @param dflt default value
|
||||
* @return a double property.
|
||||
|
@ -208,6 +232,7 @@ public class Config {
|
|||
* If the property contain ":", e.g. "true.true.false", it is interpreted
|
||||
* as array of booleans. It is extracted once, on first call
|
||||
* to get() it, and a by-round-value is returned.
|
||||
*
|
||||
* @param name name of property
|
||||
* @param dflt default value
|
||||
* @return a int property.
|
||||
|
@ -235,6 +260,7 @@ public class Config {
|
|||
|
||||
/**
|
||||
* Increment the round number, for config values that are extracted by round number.
|
||||
*
|
||||
* @return the new round number.
|
||||
*/
|
||||
public int newRound() {
|
||||
|
@ -257,8 +283,12 @@ public class Config {
|
|||
int n1 = (roundNumber - 1) % ad.length;
|
||||
int n2 = roundNumber % ad.length;
|
||||
sb.append(" ").append(name).append(":").append(ad[n1]).append("-->").append(ad[n2]);
|
||||
}
|
||||
else {
|
||||
} else if (a instanceof String[]) {
|
||||
String ad[] = (String[]) a;
|
||||
int n1 = (roundNumber - 1) % ad.length;
|
||||
int n2 = roundNumber % ad.length;
|
||||
sb.append(" ").append(name).append(":").append(ad[n1]).append("-->").append(ad[n2]);
|
||||
} else {
|
||||
boolean ab[] = (boolean[]) a;
|
||||
int n1 = (roundNumber - 1) % ab.length;
|
||||
int n2 = roundNumber % ab.length;
|
||||
|
@ -274,6 +304,20 @@ public class Config {
|
|||
return roundNumber;
|
||||
}
|
||||
|
||||
private String[] propToStringArray(String s) {
|
||||
if (s.indexOf(":") < 0) {
|
||||
return new String[]{s};
|
||||
}
|
||||
|
||||
ArrayList<String> a = new ArrayList<String>();
|
||||
StringTokenizer st = new StringTokenizer(s, ":");
|
||||
while (st.hasMoreTokens()) {
|
||||
String t = st.nextToken();
|
||||
a.add(t);
|
||||
}
|
||||
return (String[]) a.toArray(new String[a.size()]);
|
||||
}
|
||||
|
||||
// extract properties to array, e.g. for "10:100:5" return int[]{10,100,5}.
|
||||
private int[] propToIntArray(String s) {
|
||||
if (s.indexOf(":") < 0) {
|
||||
|
@ -367,13 +411,15 @@ public class Config {
|
|||
int ai[] = (int[]) a;
|
||||
int n = roundNum % ai.length;
|
||||
sb.append(Format.format(ai[n], template));
|
||||
}
|
||||
else if (a instanceof double[]) {
|
||||
} else if (a instanceof double[]) {
|
||||
double ad[] = (double[]) a;
|
||||
int n = roundNum % ad.length;
|
||||
sb.append(Format.format(2, ad[n], template));
|
||||
}
|
||||
else {
|
||||
} else if (a instanceof String[]) {
|
||||
String ad[] = (String[]) a;
|
||||
int n = roundNum % ad.length;
|
||||
sb.append(ad[n]);
|
||||
} else {
|
||||
boolean ab[] = (boolean[]) a;
|
||||
int n = roundNum % ab.length;
|
||||
sb.append(Format.formatPaddLeft("" + ab[n], template));
|
||||
|
|
Loading…
Reference in New Issue