LUCENE-2343: add support for benchmarking collectors

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@927178 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2010-03-24 20:49:44 +00:00
parent 03216a150e
commit eb6e13fe9e
6 changed files with 463 additions and 122 deletions

View File

@ -2,7 +2,10 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
2/21/2020 3/24/2010
LUCENE-2343: Added support for benchmarking collectors. (Grant Ingersoll, Shai Erera)
2/21/2010
LUCENE-2254: Add support to the quality package for running LUCENE-2254: Add support to the quality package for running
experiments with any combination of Title, Description, and Narrative. experiments with any combination of Title, Description, and Narrative.
(Robert Muir) (Robert Muir)

View File

@ -0,0 +1,91 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
# collector.class can be:
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
log.step=100000
search.num.hits=100000
content.source=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishContentSource
query.maker=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
CreateIndex
{ "MAddDocs" AddDoc } : 200000
Optimize
CloseIndex
}
OpenReader
{ "topDocs" SearchWithCollector > : 10
CloseReader
# OpenReader
#uses an array of search.num.hits size, but can also take in a parameter
# { "psc" SearchWithPostSortCollector > : 10
# { "psc100" SearchWithPostSortCollector(100) > : 10
# { "psc1000" SearchWithPostSortCollector(1000) > : 10
# { "psc10000" SearchWithPostSortCollector(10000) > : 10
# { "psc50000" SearchWithPostSortCollector(50000) > : 10
# CloseReader
RepSumByPref topDocs
# RepSumByPref psc
# RepSumByPref psc100
# RepSumByPref psc1000
# RepSumByPref psc10000
# RepSumByPref psc50000
NewRound
} : 4
#RepSumByNameRound
#RepSumByName
#RepSumByPrefRound topDocs
#RepSumByPrefRound psc
#RepSumByPrefRound psc100
#RepSumByPrefRound psc1000
#RepSumByPrefRound psc10000
#RepSumByPrefRound psc50000

View File

@ -0,0 +1,91 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
# collector.class can be:
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
log.step=100000
search.num.hits=1000000
content.source=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishContentSource
query.maker=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
CreateIndex
{ "MAddDocs" AddDoc } : 2000000
Optimize
CloseIndex
}
OpenReader
{ "topDocs" SearchWithCollector > : 10
CloseReader
# OpenReader
#uses an array of search.num.hits size, but can also take in a parameter
# { "psc" SearchWithPostSortCollector > : 10
# { "psc100" SearchWithPostSortCollector(100) > : 10
# { "psc1000" SearchWithPostSortCollector(1000) > : 10
# { "psc10000" SearchWithPostSortCollector(10000) > : 10
# { "psc50000" SearchWithPostSortCollector(50000) > : 10
# CloseReader
RepSumByPref topDocs
# RepSumByPref psc
# RepSumByPref psc100
# RepSumByPref psc1000
# RepSumByPref psc10000
# RepSumByPref psc50000
NewRound
} : 4
#RepSumByNameRound
#RepSumByName
#RepSumByPrefRound topDocs
#RepSumByPrefRound psc
#RepSumByPrefRound psc100
#RepSumByPrefRound psc1000
#RepSumByPrefRound psc10000
#RepSumByPrefRound psc50000

View File

@ -30,10 +30,12 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -105,9 +107,10 @@ public abstract class ReadTask extends PerfTask {
res++; res++;
Query q = queryMaker.makeQuery(); Query q = queryMaker.makeQuery();
Sort sort = getSort(); Sort sort = getSort();
TopDocs hits; TopDocs hits = null;
final int numHits = numHits(); final int numHits = numHits();
if (numHits > 0) { if (numHits > 0) {
if (withCollector() == false) {
if (sort != null) { if (sort != null) {
Weight w = q.weight(searcher); Weight w = q.weight(searcher);
TopFieldCollector collector = TopFieldCollector.create(sort, numHits, TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
@ -119,9 +122,14 @@ public abstract class ReadTask extends PerfTask {
} else { } else {
hits = searcher.search(q, numHits); hits = searcher.search(q, numHits);
} }
} else {
Collector collector = createCollector();
searcher.search(q, null, collector);
//hits = collector.topDocs();
}
final String printHitsField = getRunData().getConfig().get("print.hits.field", null); final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
if (printHitsField != null && printHitsField.length() > 0) { if (hits != null && printHitsField != null && printHitsField.length() > 0) {
if (q instanceof MultiTermQuery) { if (q instanceof MultiTermQuery) {
System.out.println("MultiTermQuery term count = " + ((MultiTermQuery) q).getTotalNumberOfTerms()); System.out.println("MultiTermQuery term count = " + ((MultiTermQuery) q).getTotalNumberOfTerms());
} }
@ -177,6 +185,9 @@ public abstract class ReadTask extends PerfTask {
return res; return res;
} }
protected Collector createCollector() throws Exception {
return TopScoreDocCollector.create(numHits(), true);
}
protected Document retrieveDoc(IndexReader ir, int id) throws IOException { protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
@ -193,6 +204,10 @@ public abstract class ReadTask extends PerfTask {
*/ */
public abstract boolean withSearch(); public abstract boolean withSearch();
public boolean withCollector(){
return false;
}
/** /**
* Return true if warming should be performed. * Return true if warming should be performed.

View File

@ -0,0 +1,95 @@
package org.apache.lucene.benchmark.byTask.tasks;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.TopScoreDocCollector;
import java.io.IOException;
/**
* Does search w/ a custom collector
*/
public class SearchWithCollectorTask extends SearchTask {
protected String clnName;
public SearchWithCollectorTask(PerfRunData runData) {
super(runData);
}
@Override
public void setup() throws Exception {
super.setup();
//check to make sure either the doc is being stored
PerfRunData runData = getRunData();
Config config = runData.getConfig();
clnName = config.get("collector.class", "");
}
@Override
public boolean withCollector() {
return true;
}
@Override
protected Collector createCollector() throws Exception {
Collector collector = null;
if (clnName.equalsIgnoreCase("topScoreDocOrdered") == true) {
collector = TopScoreDocCollector.create(numHits(), true);
} else if (clnName.equalsIgnoreCase("topScoreDocUnOrdered") == true) {
collector = TopScoreDocCollector.create(numHits(), false);
} else if (clnName.length() > 0){
collector = Class.forName(clnName).asSubclass(Collector.class).newInstance();
} else {
collector = super.createCollector();
}
return collector;
}
@Override
public QueryMaker getQueryMaker() {
return getRunData().getQueryMaker(this);
}
@Override
public boolean withRetrieve() {
return false;
}
@Override
public boolean withSearch() {
return true;
}
@Override
public boolean withTraverse() {
return false;
}
@Override
public boolean withWarm() {
return false;
}
}

View File

@ -22,19 +22,19 @@ import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Properties; import java.util.Properties;
import java.util.StringTokenizer; import java.util.StringTokenizer;
/** /**
* Perf run configuration properties. * Perf run configuration properties.
* <p> * <p/>
* Numeric property containing ":", e.g. "10:100:5" is interpreted * Numeric property containing ":", e.g. "10:100:5" is interpreted
* as array of numeric values. It is extracted once, on first use, and * as array of numeric values. It is extracted once, on first use, and
* maintain a round number to return the appropriate value. * maintain a round number to return the appropriate value.
* <p> * <p/>
* The config property "work.dir" tells where is the root of * The config property "work.dir" tells where is the root of
* docs data dirs and indexes dirs. It is set to either of: <ul> * docs data dirs and indexes dirs. It is set to either of: <ul>
* <li>value supplied for it in the alg file;</li> * <li>value supplied for it in the alg file;</li>
@ -54,6 +54,7 @@ public class Config {
/** /**
* Read both algorithm and config properties. * Read both algorithm and config properties.
*
* @param algReader from where to read algorithm and config properties. * @param algReader from where to read algorithm and config properties.
* @throws IOException * @throws IOException
*/ */
@ -121,17 +122,38 @@ public class Config {
/** /**
* Return a string property. * Return a string property.
*
* @param name name of property. * @param name name of property.
* @param dflt default value. * @param dflt default value.
* @return a string property. * @return a string property.
*/ */
public String get(String name, String dflt) { public String get(String name, String dflt) {
return props.getProperty(name,dflt); String vals[] = (String[]) valByRound.get(name);
if (vals != null) {
return vals[roundNumber % vals.length];
}
// done if not by round
String sval = props.getProperty(name, dflt);
if (sval == null) {
return null;
}
if (sval.indexOf(":") < 0) {
return sval;
}
// first time this prop is extracted by round
int k = sval.indexOf(":");
String colName = sval.substring(0, k);
sval = sval.substring(k + 1);
colForValByRound.put(name, colName);
vals = propToStringArray(sval);
valByRound.put(name, vals);
return vals[roundNumber % vals.length];
} }
/** /**
* Set a property. * Set a property.
* Note: once a multiple values property is set, it can no longer be modified. * Note: once a multiple values property is set, it can no longer be modified.
*
* @param name name of property. * @param name name of property.
* @param value either single or multiple property value (multiple values are separated by ":") * @param value either single or multiple property value (multiple values are separated by ":")
* @throws Exception * @throws Exception
@ -148,6 +170,7 @@ public class Config {
* If the property contain ":", e.g. "10:100:5", it is interpreted * If the property contain ":", e.g. "10:100:5", it is interpreted
* as array of ints. It is extracted once, on first call * as array of ints. It is extracted once, on first call
* to get() it, and a by-round-value is returned. * to get() it, and a by-round-value is returned.
*
* @param name name of property * @param name name of property
* @param dflt default value * @param dflt default value
* @return a int property. * @return a int property.
@ -178,6 +201,7 @@ public class Config {
* If the property contain ":", e.g. "10:100:5", it is interpreted * If the property contain ":", e.g. "10:100:5", it is interpreted
* as array of doubles. It is extracted once, on first call * as array of doubles. It is extracted once, on first call
* to get() it, and a by-round-value is returned. * to get() it, and a by-round-value is returned.
*
* @param name name of property * @param name name of property
* @param dflt default value * @param dflt default value
* @return a double property. * @return a double property.
@ -208,6 +232,7 @@ public class Config {
* If the property contain ":", e.g. "true.true.false", it is interpreted * If the property contain ":", e.g. "true.true.false", it is interpreted
* as array of booleans. It is extracted once, on first call * as array of booleans. It is extracted once, on first call
* to get() it, and a by-round-value is returned. * to get() it, and a by-round-value is returned.
*
* @param name name of property * @param name name of property
* @param dflt default value * @param dflt default value
* @return a int property. * @return a int property.
@ -235,6 +260,7 @@ public class Config {
/** /**
* Increment the round number, for config values that are extracted by round number. * Increment the round number, for config values that are extracted by round number.
*
* @return the new round number. * @return the new round number.
*/ */
public int newRound() { public int newRound() {
@ -257,8 +283,12 @@ public class Config {
int n1 = (roundNumber - 1) % ad.length; int n1 = (roundNumber - 1) % ad.length;
int n2 = roundNumber % ad.length; int n2 = roundNumber % ad.length;
sb.append(" ").append(name).append(":").append(ad[n1]).append("-->").append(ad[n2]); sb.append(" ").append(name).append(":").append(ad[n1]).append("-->").append(ad[n2]);
} } else if (a instanceof String[]) {
else { String ad[] = (String[]) a;
int n1 = (roundNumber - 1) % ad.length;
int n2 = roundNumber % ad.length;
sb.append(" ").append(name).append(":").append(ad[n1]).append("-->").append(ad[n2]);
} else {
boolean ab[] = (boolean[]) a; boolean ab[] = (boolean[]) a;
int n1 = (roundNumber - 1) % ab.length; int n1 = (roundNumber - 1) % ab.length;
int n2 = roundNumber % ab.length; int n2 = roundNumber % ab.length;
@ -274,6 +304,20 @@ public class Config {
return roundNumber; return roundNumber;
} }
private String[] propToStringArray(String s) {
if (s.indexOf(":") < 0) {
return new String[]{s};
}
ArrayList<String> a = new ArrayList<String>();
StringTokenizer st = new StringTokenizer(s, ":");
while (st.hasMoreTokens()) {
String t = st.nextToken();
a.add(t);
}
return (String[]) a.toArray(new String[a.size()]);
}
// extract properties to array, e.g. for "10:100:5" return int[]{10,100,5}. // extract properties to array, e.g. for "10:100:5" return int[]{10,100,5}.
private int[] propToIntArray(String s) { private int[] propToIntArray(String s) {
if (s.indexOf(":") < 0) { if (s.indexOf(":") < 0) {
@ -367,13 +411,15 @@ public class Config {
int ai[] = (int[]) a; int ai[] = (int[]) a;
int n = roundNum % ai.length; int n = roundNum % ai.length;
sb.append(Format.format(ai[n], template)); sb.append(Format.format(ai[n], template));
} } else if (a instanceof double[]) {
else if (a instanceof double[]) {
double ad[] = (double[]) a; double ad[] = (double[]) a;
int n = roundNum % ad.length; int n = roundNum % ad.length;
sb.append(Format.format(2, ad[n], template)); sb.append(Format.format(2, ad[n], template));
} } else if (a instanceof String[]) {
else { String ad[] = (String[]) a;
int n = roundNum % ad.length;
sb.append(ad[n]);
} else {
boolean ab[] = (boolean[]) a; boolean ab[] = (boolean[]) a;
int n = roundNum % ab.length; int n = roundNum % ab.length;
sb.append(Format.formatPaddLeft("" + ab[n], template)); sb.append(Format.formatPaddLeft("" + ab[n], template));