mirror of https://github.com/apache/lucene.git
LUCENE-1116: contrib/benchmark quality package improvements (MRR, Trec1MQ)
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@608370 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
40eb1cd53f
commit
9e65cd554f
|
@ -4,6 +4,13 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
||||||
|
|
||||||
$Id:$
|
$Id:$
|
||||||
|
|
||||||
|
01/03/08
|
||||||
|
LUCENE-1116: quality package improvements:
|
||||||
|
- add MRR computation;
|
||||||
|
- allow control of max #queries to run;
|
||||||
|
- verify log & report are flushed.
|
||||||
|
- add TREC query reader for the 1MQ track.
|
||||||
|
|
||||||
12/31/07
|
12/31/07
|
||||||
LUCENE-1102: EnwikiDocMaker now indexes the docid field, so results might not be comparable with results prior to this change, although
|
LUCENE-1102: EnwikiDocMaker now indexes the docid field, so results might not be comparable with results prior to this change, although
|
||||||
it is doubted that this one small field makes much difference.
|
it is doubted that this one small field makes much difference.
|
||||||
|
|
|
@ -52,6 +52,12 @@ public class QualityBenchmark {
|
||||||
/** index field to extract doc name for each search result; used for judging the results. */
|
/** index field to extract doc name for each search result; used for judging the results. */
|
||||||
protected String docNameField;
|
protected String docNameField;
|
||||||
|
|
||||||
|
/** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
|
||||||
|
private int maxQueries = Integer.MAX_VALUE;
|
||||||
|
|
||||||
|
/** maximal number of results to collect for each query. Default: 1000. */
|
||||||
|
private int maxResults = 1000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a QualityBenchmark.
|
* Create a QualityBenchmark.
|
||||||
* @param qqs quality queries to run.
|
* @param qqs quality queries to run.
|
||||||
|
@ -71,7 +77,6 @@ public class QualityBenchmark {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run the quality benchmark.
|
* Run the quality benchmark.
|
||||||
* @param maxResults how many results to collect for each quality query.
|
|
||||||
* @param judge the judge that can tell if a certain result doc is relevant for a certain quality query.
|
* @param judge the judge that can tell if a certain result doc is relevant for a certain quality query.
|
||||||
* If null, no judgements would be made. Usually null for a submission run.
|
* If null, no judgements would be made. Usually null for a submission run.
|
||||||
* @param submitRep submission report is created if non null.
|
* @param submitRep submission report is created if non null.
|
||||||
|
@ -79,10 +84,11 @@ public class QualityBenchmark {
|
||||||
* @return QualityStats of each quality query that was executed.
|
* @return QualityStats of each quality query that was executed.
|
||||||
* @throws Exception if quality benchmark failed to run.
|
* @throws Exception if quality benchmark failed to run.
|
||||||
*/
|
*/
|
||||||
public QualityStats [] execute(int maxResults, Judge judge, SubmissionReport submitRep,
|
public QualityStats [] execute(Judge judge, SubmissionReport submitRep,
|
||||||
PrintWriter qualityLog) throws Exception {
|
PrintWriter qualityLog) throws Exception {
|
||||||
QualityStats stats[] = new QualityStats[qualityQueries.length];
|
int nQueries = Math.min(maxQueries, qualityQueries.length);
|
||||||
for (int i=0; i<qualityQueries.length; i++) {
|
QualityStats stats[] = new QualityStats[nQueries];
|
||||||
|
for (int i=0; i<nQueries; i++) {
|
||||||
QualityQuery qq = qualityQueries[i];
|
QualityQuery qq = qualityQueries[i];
|
||||||
// generate query
|
// generate query
|
||||||
Query q = qqParser.parse(qq);
|
Query q = qqParser.parse(qq);
|
||||||
|
@ -98,6 +104,9 @@ public class QualityBenchmark {
|
||||||
submitRep.report(qq,td,docNameField,searcher);
|
submitRep.report(qq,td,docNameField,searcher);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (submitRep!=null) {
|
||||||
|
submitRep.flush();
|
||||||
|
}
|
||||||
return stats;
|
return stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -121,4 +130,32 @@ public class QualityBenchmark {
|
||||||
return stts;
|
return stts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the maximum number of quality queries to run. Useful at debugging.
|
||||||
|
*/
|
||||||
|
public int getMaxQueries() {
|
||||||
|
return maxQueries;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the maximum number of quality queries to run. Useful at debugging.
|
||||||
|
*/
|
||||||
|
public void setMaxQueries(int maxQueries) {
|
||||||
|
this.maxQueries = maxQueries;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the maximum number of results to collect for each quality query.
|
||||||
|
*/
|
||||||
|
public int getMaxResults() {
|
||||||
|
return maxResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set the maximum number of results to collect for each quality query.
|
||||||
|
*/
|
||||||
|
public void setMaxResults(int maxResults) {
|
||||||
|
this.maxResults = maxResults;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,7 @@ public class QualityStats {
|
||||||
private double pReleventSum = 0;
|
private double pReleventSum = 0;
|
||||||
private double numPoints = 0;
|
private double numPoints = 0;
|
||||||
private double numGoodPoints = 0;
|
private double numGoodPoints = 0;
|
||||||
|
private double mrr = 0;
|
||||||
private long searchTime;
|
private long searchTime;
|
||||||
private long docNamesExtractTime;
|
private long docNamesExtractTime;
|
||||||
|
|
||||||
|
@ -82,6 +83,9 @@ public class QualityStats {
|
||||||
if (isRelevant) {
|
if (isRelevant) {
|
||||||
numGoodPoints+=1;
|
numGoodPoints+=1;
|
||||||
recallPoints.add(new RecallPoint(n,numGoodPoints));
|
recallPoints.add(new RecallPoint(n,numGoodPoints));
|
||||||
|
if (recallPoints.size()==1 && n<=5) { // first point, but only within 5 top scores.
|
||||||
|
mrr = 1.0 / n;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
numPoints = n;
|
numPoints = n;
|
||||||
double p = numGoodPoints / numPoints;
|
double p = numGoodPoints / numPoints;
|
||||||
|
@ -111,7 +115,7 @@ public class QualityStats {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the average precision at recall points: sum of precision at recall points / maxGoodPoints.
|
* Return the average precision at recall points.
|
||||||
*/
|
*/
|
||||||
public double getAvp() {
|
public double getAvp() {
|
||||||
return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
|
return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
|
||||||
|
@ -154,6 +158,8 @@ public class QualityStats {
|
||||||
fracFormat(nf.format(maxGoodPoints)));
|
fracFormat(nf.format(maxGoodPoints)));
|
||||||
logger.println(prefix+format("Average Precision: ",M)+
|
logger.println(prefix+format("Average Precision: ",M)+
|
||||||
fracFormat(nf.format(getAvp())));
|
fracFormat(nf.format(getAvp())));
|
||||||
|
logger.println(prefix+format("MRR: ",M)+
|
||||||
|
fracFormat(nf.format(getMRR())));
|
||||||
logger.println(prefix+format("Recall: ",M)+
|
logger.println(prefix+format("Recall: ",M)+
|
||||||
fracFormat(nf.format(getRecall())));
|
fracFormat(nf.format(getRecall())));
|
||||||
for (int i=1; i<(int)numPoints && i<pAt.length; i++) {
|
for (int i=1; i<(int)numPoints && i<pAt.length; i++) {
|
||||||
|
@ -186,6 +192,10 @@ public class QualityStats {
|
||||||
*/
|
*/
|
||||||
public static QualityStats average(QualityStats[] stats) {
|
public static QualityStats average(QualityStats[] stats) {
|
||||||
QualityStats avg = new QualityStats(0,0);
|
QualityStats avg = new QualityStats(0,0);
|
||||||
|
if (stats.length==0) {
|
||||||
|
// weired, no stats to average!
|
||||||
|
return avg;
|
||||||
|
}
|
||||||
int m = 0; // queries with positive judgements
|
int m = 0; // queries with positive judgements
|
||||||
// aggregate
|
// aggregate
|
||||||
for (int i=0; i<stats.length; i++) {
|
for (int i=0; i<stats.length; i++) {
|
||||||
|
@ -197,6 +207,7 @@ public class QualityStats {
|
||||||
avg.numPoints += stats[i].numPoints;
|
avg.numPoints += stats[i].numPoints;
|
||||||
avg.pReleventSum += stats[i].getAvp();
|
avg.pReleventSum += stats[i].getAvp();
|
||||||
avg.recall += stats[i].recall;
|
avg.recall += stats[i].recall;
|
||||||
|
avg.mrr += stats[i].getMRR();
|
||||||
avg.maxGoodPoints += stats[i].maxGoodPoints;
|
avg.maxGoodPoints += stats[i].maxGoodPoints;
|
||||||
for (int j=1; j<avg.pAt.length; j++) {
|
for (int j=1; j<avg.pAt.length; j++) {
|
||||||
avg.pAt[j] += stats[i].getPrecisionAt(j);
|
avg.pAt[j] += stats[i].getPrecisionAt(j);
|
||||||
|
@ -210,6 +221,7 @@ public class QualityStats {
|
||||||
avg.numGoodPoints /= m;
|
avg.numGoodPoints /= m;
|
||||||
avg.numPoints /= m;
|
avg.numPoints /= m;
|
||||||
avg.recall /= m;
|
avg.recall /= m;
|
||||||
|
avg.mrr /= m;
|
||||||
avg.maxGoodPoints /= m;
|
avg.maxGoodPoints /= m;
|
||||||
for (int j=1; j<avg.pAt.length; j++) {
|
for (int j=1; j<avg.pAt.length; j++) {
|
||||||
avg.pAt[j] /= m;
|
avg.pAt[j] /= m;
|
||||||
|
@ -256,6 +268,22 @@ public class QualityStats {
|
||||||
return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]);
|
return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the Mean reciprocal rank over the queries or RR for a single query.
|
||||||
|
* <p>
|
||||||
|
* Reciprocal rank is defined as <code>1/r</code> where <code>r</code> is the
|
||||||
|
* rank of the first correct result, or <code>0</code> if there are no correct
|
||||||
|
* results within the top 5 results.
|
||||||
|
* <p>
|
||||||
|
* This follows the definition in
|
||||||
|
* <a href="http://www.cnlp.org/publications/02cnlptrec10.pdf">
|
||||||
|
* Question Answering - CNLP at the TREC-10 Question Answering Track</a>.
|
||||||
|
*/
|
||||||
|
public double getMRR() {
|
||||||
|
return mrr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the search time in milliseconds for the measured query.
|
* Returns the search time in milliseconds for the measured query.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -46,8 +46,9 @@ public class QueryDriver {
|
||||||
|
|
||||||
// run the benchmark
|
// run the benchmark
|
||||||
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
|
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
|
||||||
|
qrun.setMaxResults(maxResults);
|
||||||
SubmissionReport submitLog = null;
|
SubmissionReport submitLog = null;
|
||||||
QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
|
QualityStats stats[] = qrun.execute(judge, submitLog, logger);
|
||||||
|
|
||||||
// print an avarage sum of the results
|
// print an avarage sum of the results
|
||||||
QualityStats avg = QualityStats.average(stats);
|
QualityStats avg = QualityStats.average(stats);
|
||||||
|
|
|
@ -0,0 +1,87 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.benchmark.quality.trec;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
import org.apache.lucene.benchmark.quality.QualityQuery;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read topics of TREC 1MQ track.
|
||||||
|
* <p>
|
||||||
|
* Expects this topic format -
|
||||||
|
* <pre>
|
||||||
|
* qnum:qtext
|
||||||
|
* </pre>
|
||||||
|
* Comment lines starting with '#' are ignored.
|
||||||
|
* <p>
|
||||||
|
* All topics will have a single name value pair.
|
||||||
|
*/
|
||||||
|
public class Trec1MQReader {
|
||||||
|
|
||||||
|
private String name;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for Trec's 1MQ TopicsReader
|
||||||
|
* @param name name of name-value pair to set for all queries.
|
||||||
|
*/
|
||||||
|
public Trec1MQReader(String name) {
|
||||||
|
super();
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read quality queries from trec 1MQ format topics file.
|
||||||
|
* @param reader where queries are read from.
|
||||||
|
* @return the result quality queries.
|
||||||
|
* @throws IOException if cannot read the queries.
|
||||||
|
*/
|
||||||
|
public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
|
||||||
|
ArrayList res = new ArrayList();
|
||||||
|
String line;
|
||||||
|
try {
|
||||||
|
while (null!=(line=reader.readLine())) {
|
||||||
|
line = line.trim();
|
||||||
|
if (line.startsWith("#")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// id
|
||||||
|
int k = line.indexOf(":");
|
||||||
|
String id = line.substring(0,k).trim();
|
||||||
|
// qtext
|
||||||
|
String qtext = line.substring(k+1).trim();
|
||||||
|
// we got a topic!
|
||||||
|
HashMap fields = new HashMap();
|
||||||
|
fields.put(name,qtext);
|
||||||
|
//System.out.println("id: "+id+" qtext: "+qtext+" line: "+line);
|
||||||
|
QualityQuery topic = new QualityQuery(id,fields);
|
||||||
|
res.add(topic);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
// sort result array (by ID)
|
||||||
|
QualityQuery qq[] = (QualityQuery[]) res.toArray(new QualityQuery[0]);
|
||||||
|
Arrays.sort(qq);
|
||||||
|
return qq;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -35,13 +35,16 @@ public class SubmissionReport {
|
||||||
|
|
||||||
private NumberFormat nf;
|
private NumberFormat nf;
|
||||||
private PrintWriter logger;
|
private PrintWriter logger;
|
||||||
|
private String name;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for SubmissionReport.
|
* Constructor for SubmissionReport.
|
||||||
* @param logger if null, no submission data is created.
|
* @param logger if null, no submission data is created.
|
||||||
|
* @param name name of this run.
|
||||||
*/
|
*/
|
||||||
public SubmissionReport (PrintWriter logger) {
|
public SubmissionReport (PrintWriter logger, String name) {
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
|
this.name = name;
|
||||||
nf = NumberFormat.getInstance();
|
nf = NumberFormat.getInstance();
|
||||||
nf.setMaximumFractionDigits(4);
|
nf.setMaximumFractionDigits(4);
|
||||||
nf.setMinimumFractionDigits(4);
|
nf.setMinimumFractionDigits(4);
|
||||||
|
@ -66,14 +69,21 @@ public class SubmissionReport {
|
||||||
String docName = xt.docName(searcher,sd[i].doc);
|
String docName = xt.docName(searcher,sd[i].doc);
|
||||||
logger.println(
|
logger.println(
|
||||||
qq.getQueryID() + sep +
|
qq.getQueryID() + sep +
|
||||||
'0' + sep +
|
"Q0" + sep +
|
||||||
format(docName,20) + sep +
|
format(docName,20) + sep +
|
||||||
format(""+i,7) + sep +
|
format(""+i,7) + sep +
|
||||||
nf.format(sd[i].score)
|
nf.format(sd[i].score) + sep +
|
||||||
|
name
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void flush() {
|
||||||
|
if (logger!=null) {
|
||||||
|
logger.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static String padd = " ";
|
private static String padd = " ";
|
||||||
private String format(String s, int minLen) {
|
private String format(String s, int minLen) {
|
||||||
s = (s==null ? "" : s);
|
s = (s==null ? "" : s);
|
||||||
|
|
|
@ -87,8 +87,9 @@ public class TestQualityRun extends TestCase {
|
||||||
QualityQueryParser qqParser = new SimpleQQParser("title","body");
|
QualityQueryParser qqParser = new SimpleQQParser("title","body");
|
||||||
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
|
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
|
||||||
|
|
||||||
SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger) : null;
|
SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger, "TestRun") : null;
|
||||||
QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
|
qrun.setMaxResults(maxResults);
|
||||||
|
QualityStats stats[] = qrun.execute(judge, submitLog, logger);
|
||||||
|
|
||||||
// --------- verify by the way judgments were altered for this test:
|
// --------- verify by the way judgments were altered for this test:
|
||||||
// for some queries, depending on m = qnum % 8
|
// for some queries, depending on m = qnum % 8
|
||||||
|
|
Loading…
Reference in New Issue