LUCENE-2343: add support for benchmarking collectors

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@927178 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2010-03-24 20:49:44 +00:00
parent 03216a150e
commit eb6e13fe9e
6 changed files with 463 additions and 122 deletions

View File

@ -2,7 +2,10 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
2/21/2020
3/24/2010
LUCENE-2343: Added support for benchmarking collectors. (Grant Ingersoll, Shai Erera)
2/21/2010
LUCENE-2254: Add support to the quality package for running
experiments with any combination of Title, Description, and Narrative.
(Robert Muir)

View File

@ -0,0 +1,91 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
# collector.class can be:
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
log.step=100000
search.num.hits=100000
content.source=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishContentSource
query.maker=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
CreateIndex
{ "MAddDocs" AddDoc } : 200000
Optimize
CloseIndex
}
OpenReader
{ "topDocs" SearchWithCollector > : 10
CloseReader
# OpenReader
#uses an array of search.num.hits size, but can also take in a parameter
# { "psc" SearchWithPostSortCollector > : 10
# { "psc100" SearchWithPostSortCollector(100) > : 10
# { "psc1000" SearchWithPostSortCollector(1000) > : 10
# { "psc10000" SearchWithPostSortCollector(10000) > : 10
# { "psc50000" SearchWithPostSortCollector(50000) > : 10
# CloseReader
RepSumByPref topDocs
# RepSumByPref psc
# RepSumByPref psc100
# RepSumByPref psc1000
# RepSumByPref psc10000
# RepSumByPref psc50000
NewRound
} : 4
#RepSumByNameRound
#RepSumByName
#RepSumByPrefRound topDocs
#RepSumByPrefRound psc
#RepSumByPrefRound psc100
#RepSumByPrefRound psc1000
#RepSumByPrefRound psc10000
#RepSumByPrefRound psc50000

View File

@ -0,0 +1,91 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
# collector.class can be:
# Fully Qualified Class Name of a Collector with a empty constructor
# topScoreDocOrdered - Creates a TopScoreDocCollector that requires in order docs
# topScoreDocUnordered - Like above, but allows out of order
collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
log.step=100000
search.num.hits=1000000
content.source=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishContentSource
query.maker=org.apache.lucene.benchmark.byTask.feeds.LongToEnglishQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
CreateIndex
{ "MAddDocs" AddDoc } : 2000000
Optimize
CloseIndex
}
OpenReader
{ "topDocs" SearchWithCollector > : 10
CloseReader
# OpenReader
#uses an array of search.num.hits size, but can also take in a parameter
# { "psc" SearchWithPostSortCollector > : 10
# { "psc100" SearchWithPostSortCollector(100) > : 10
# { "psc1000" SearchWithPostSortCollector(1000) > : 10
# { "psc10000" SearchWithPostSortCollector(10000) > : 10
# { "psc50000" SearchWithPostSortCollector(50000) > : 10
# CloseReader
RepSumByPref topDocs
# RepSumByPref psc
# RepSumByPref psc100
# RepSumByPref psc1000
# RepSumByPref psc10000
# RepSumByPref psc50000
NewRound
} : 4
#RepSumByNameRound
#RepSumByName
#RepSumByPrefRound topDocs
#RepSumByPrefRound psc
#RepSumByPrefRound psc100
#RepSumByPrefRound psc1000
#RepSumByPrefRound psc10000
#RepSumByPrefRound psc50000

View File

@ -30,10 +30,12 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@ -105,9 +107,10 @@ public abstract class ReadTask extends PerfTask {
res++;
Query q = queryMaker.makeQuery();
Sort sort = getSort();
TopDocs hits;
TopDocs hits = null;
final int numHits = numHits();
if (numHits > 0) {
if (withCollector() == false) {
if (sort != null) {
Weight w = q.weight(searcher);
TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
@ -119,9 +122,14 @@ public abstract class ReadTask extends PerfTask {
} else {
hits = searcher.search(q, numHits);
}
} else {
Collector collector = createCollector();
searcher.search(q, null, collector);
//hits = collector.topDocs();
}
final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
if (printHitsField != null && printHitsField.length() > 0) {
if (hits != null && printHitsField != null && printHitsField.length() > 0) {
if (q instanceof MultiTermQuery) {
System.out.println("MultiTermQuery term count = " + ((MultiTermQuery) q).getTotalNumberOfTerms());
}
@ -177,6 +185,9 @@ public abstract class ReadTask extends PerfTask {
return res;
}
protected Collector createCollector() throws Exception {
return TopScoreDocCollector.create(numHits(), true);
}
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
@ -193,6 +204,10 @@ public abstract class ReadTask extends PerfTask {
*/
public abstract boolean withSearch();
public boolean withCollector(){
return false;
}
/**
* Return true if warming should be performed.

View File

@ -0,0 +1,95 @@
package org.apache.lucene.benchmark.byTask.tasks;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.TopScoreDocCollector;
import java.io.IOException;
/**
* Does search w/ a custom collector
*/
public class SearchWithCollectorTask extends SearchTask {
protected String clnName;
public SearchWithCollectorTask(PerfRunData runData) {
super(runData);
}
@Override
public void setup() throws Exception {
super.setup();
//check to make sure either the doc is being stored
PerfRunData runData = getRunData();
Config config = runData.getConfig();
clnName = config.get("collector.class", "");
}
@Override
public boolean withCollector() {
return true;
}
@Override
protected Collector createCollector() throws Exception {
Collector collector = null;
if (clnName.equalsIgnoreCase("topScoreDocOrdered") == true) {
collector = TopScoreDocCollector.create(numHits(), true);
} else if (clnName.equalsIgnoreCase("topScoreDocUnOrdered") == true) {
collector = TopScoreDocCollector.create(numHits(), false);
} else if (clnName.length() > 0){
collector = Class.forName(clnName).asSubclass(Collector.class).newInstance();
} else {
collector = super.createCollector();
}
return collector;
}
@Override
public QueryMaker getQueryMaker() {
return getRunData().getQueryMaker(this);
}
@Override
public boolean withRetrieve() {
return false;
}
@Override
public boolean withSearch() {
return true;
}
@Override
public boolean withTraverse() {
return false;
}
@Override
public boolean withWarm() {
return false;
}
}

View File

@ -22,19 +22,19 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.StringTokenizer;
/**
* Perf run configuration properties.
* <p>
* <p/>
* Numeric property containing ":", e.g. "10:100:5" is interpreted
* as array of numeric values. It is extracted once, on first use, and
* maintain a round number to return the appropriate value.
* <p>
* <p/>
* The config property "work.dir" tells where is the root of
* docs data dirs and indexes dirs. It is set to either of: <ul>
* <li>value supplied for it in the alg file;</li>
@ -54,6 +54,7 @@ public class Config {
/**
* Read both algorithm and config properties.
*
* @param algReader from where to read algorithm and config properties.
* @throws IOException
*/
@ -121,17 +122,38 @@ public class Config {
/**
* Return a string property.
*
* @param name name of property.
* @param dflt default value.
* @return a string property.
*/
public String get(String name, String dflt) {
return props.getProperty(name,dflt);
String vals[] = (String[]) valByRound.get(name);
if (vals != null) {
return vals[roundNumber % vals.length];
}
// done if not by round
String sval = props.getProperty(name, dflt);
if (sval == null) {
return null;
}
if (sval.indexOf(":") < 0) {
return sval;
}
// first time this prop is extracted by round
int k = sval.indexOf(":");
String colName = sval.substring(0, k);
sval = sval.substring(k + 1);
colForValByRound.put(name, colName);
vals = propToStringArray(sval);
valByRound.put(name, vals);
return vals[roundNumber % vals.length];
}
/**
* Set a property.
* Note: once a multiple values property is set, it can no longer be modified.
*
* @param name name of property.
* @param value either single or multiple property value (multiple values are separated by ":")
* @throws Exception
@ -148,6 +170,7 @@ public class Config {
* If the property contain ":", e.g. "10:100:5", it is interpreted
* as array of ints. It is extracted once, on first call
* to get() it, and a by-round-value is returned.
*
* @param name name of property
* @param dflt default value
* @return a int property.
@ -178,6 +201,7 @@ public class Config {
* If the property contain ":", e.g. "10:100:5", it is interpreted
* as array of doubles. It is extracted once, on first call
* to get() it, and a by-round-value is returned.
*
* @param name name of property
* @param dflt default value
* @return a double property.
@ -208,6 +232,7 @@ public class Config {
* If the property contain ":", e.g. "true.true.false", it is interpreted
* as array of booleans. It is extracted once, on first call
* to get() it, and a by-round-value is returned.
*
* @param name name of property
* @param dflt default value
* @return a int property.
@ -235,6 +260,7 @@ public class Config {
/**
* Increment the round number, for config values that are extracted by round number.
*
* @return the new round number.
*/
public int newRound() {
@ -257,8 +283,12 @@ public class Config {
int n1 = (roundNumber - 1) % ad.length;
int n2 = roundNumber % ad.length;
sb.append(" ").append(name).append(":").append(ad[n1]).append("-->").append(ad[n2]);
}
else {
} else if (a instanceof String[]) {
String ad[] = (String[]) a;
int n1 = (roundNumber - 1) % ad.length;
int n2 = roundNumber % ad.length;
sb.append(" ").append(name).append(":").append(ad[n1]).append("-->").append(ad[n2]);
} else {
boolean ab[] = (boolean[]) a;
int n1 = (roundNumber - 1) % ab.length;
int n2 = roundNumber % ab.length;
@ -274,6 +304,20 @@ public class Config {
return roundNumber;
}
private String[] propToStringArray(String s) {
if (s.indexOf(":") < 0) {
return new String[]{s};
}
ArrayList<String> a = new ArrayList<String>();
StringTokenizer st = new StringTokenizer(s, ":");
while (st.hasMoreTokens()) {
String t = st.nextToken();
a.add(t);
}
return (String[]) a.toArray(new String[a.size()]);
}
// extract properties to array, e.g. for "10:100:5" return int[]{10,100,5}.
private int[] propToIntArray(String s) {
if (s.indexOf(":") < 0) {
@ -367,13 +411,15 @@ public class Config {
int ai[] = (int[]) a;
int n = roundNum % ai.length;
sb.append(Format.format(ai[n], template));
}
else if (a instanceof double[]) {
} else if (a instanceof double[]) {
double ad[] = (double[]) a;
int n = roundNum % ad.length;
sb.append(Format.format(2, ad[n], template));
}
else {
} else if (a instanceof String[]) {
String ad[] = (String[]) a;
int n = roundNum % ad.length;
sb.append(ad[n]);
} else {
boolean ab[] = (boolean[]) a;
int n = roundNum % ab.length;
sb.append(Format.formatPaddLeft("" + ab[n], template));