LUCENE-1116: contrib/benchmark quality package improvements (MRR, Trec1MQ)

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@608370 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2008-01-03 07:44:40 +00:00
parent 40eb1cd53f
commit 9e65cd554f
7 changed files with 182 additions and 11 deletions

View File

@ -4,6 +4,13 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
$Id:$ $Id:$
01/03/08
LUCENE-1116: quality package improvements:
- add MRR computation;
- allow control of max #queries to run;
- verify log & report are flushed.
- add TREC query reader for the 1MQ track.
12/31/07 12/31/07
LUCENE-1102: EnwikiDocMaker now indexes the docid field, so results might not be comparable with results prior to this change, although LUCENE-1102: EnwikiDocMaker now indexes the docid field, so results might not be comparable with results prior to this change, although
it is doubted that this one small field makes much difference. it is doubted that this one small field makes much difference.

View File

@ -52,6 +52,12 @@ public class QualityBenchmark {
/** index field to extract doc name for each search result; used for judging the results. */ /** index field to extract doc name for each search result; used for judging the results. */
protected String docNameField; protected String docNameField;
/** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
private int maxQueries = Integer.MAX_VALUE;
/** maximal number of results to collect for each query. Default: 1000. */
private int maxResults = 1000;
/** /**
* Create a QualityBenchmark. * Create a QualityBenchmark.
* @param qqs quality queries to run. * @param qqs quality queries to run.
@ -71,7 +77,6 @@ public class QualityBenchmark {
/** /**
* Run the quality benchmark. * Run the quality benchmark.
* @param maxResults how many results to collect for each quality query.
* @param judge the judge that can tell if a certain result doc is relevant for a certain quality query. * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query.
* If null, no judgements would be made. Usually null for a submission run. * If null, no judgements would be made. Usually null for a submission run.
* @param submitRep submission report is created if non null. * @param submitRep submission report is created if non null.
@ -79,10 +84,11 @@ public class QualityBenchmark {
* @return QualityStats of each quality query that was executed. * @return QualityStats of each quality query that was executed.
* @throws Exception if quality benchmark failed to run. * @throws Exception if quality benchmark failed to run.
*/ */
public QualityStats [] execute(int maxResults, Judge judge, SubmissionReport submitRep, public QualityStats [] execute(Judge judge, SubmissionReport submitRep,
PrintWriter qualityLog) throws Exception { PrintWriter qualityLog) throws Exception {
QualityStats stats[] = new QualityStats[qualityQueries.length]; int nQueries = Math.min(maxQueries, qualityQueries.length);
for (int i=0; i<qualityQueries.length; i++) { QualityStats stats[] = new QualityStats[nQueries];
for (int i=0; i<nQueries; i++) {
QualityQuery qq = qualityQueries[i]; QualityQuery qq = qualityQueries[i];
// generate query // generate query
Query q = qqParser.parse(qq); Query q = qqParser.parse(qq);
@ -98,6 +104,9 @@ public class QualityBenchmark {
submitRep.report(qq,td,docNameField,searcher); submitRep.report(qq,td,docNameField,searcher);
} }
} }
if (submitRep!=null) {
submitRep.flush();
}
return stats; return stats;
} }
@ -121,4 +130,32 @@ public class QualityBenchmark {
return stts; return stts;
} }
/**
* @return the maximum number of quality queries to run. Useful at debugging.
*/
public int getMaxQueries() {
return maxQueries;
}
/**
* Set the maximum number of quality queries to run. Useful at debugging.
*/
public void setMaxQueries(int maxQueries) {
this.maxQueries = maxQueries;
}
/**
* @return the maximum number of results to collect for each quality query.
*/
public int getMaxResults() {
return maxResults;
}
/**
* set the maximum number of results to collect for each quality query.
*/
public void setMaxResults(int maxResults) {
this.maxResults = maxResults;
}
} }

View File

@ -34,6 +34,7 @@ public class QualityStats {
private double pReleventSum = 0; private double pReleventSum = 0;
private double numPoints = 0; private double numPoints = 0;
private double numGoodPoints = 0; private double numGoodPoints = 0;
private double mrr = 0;
private long searchTime; private long searchTime;
private long docNamesExtractTime; private long docNamesExtractTime;
@ -82,6 +83,9 @@ public class QualityStats {
if (isRelevant) { if (isRelevant) {
numGoodPoints+=1; numGoodPoints+=1;
recallPoints.add(new RecallPoint(n,numGoodPoints)); recallPoints.add(new RecallPoint(n,numGoodPoints));
if (recallPoints.size()==1 && n<=5) { // first point, but only within 5 top scores.
mrr = 1.0 / n;
}
} }
numPoints = n; numPoints = n;
double p = numGoodPoints / numPoints; double p = numGoodPoints / numPoints;
@ -111,7 +115,7 @@ public class QualityStats {
} }
/** /**
* Return the average precision at recall points: sum of precision at recall points / maxGoodPoints. * Return the average precision at recall points.
*/ */
public double getAvp() { public double getAvp() {
return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints; return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
@ -154,6 +158,8 @@ public class QualityStats {
fracFormat(nf.format(maxGoodPoints))); fracFormat(nf.format(maxGoodPoints)));
logger.println(prefix+format("Average Precision: ",M)+ logger.println(prefix+format("Average Precision: ",M)+
fracFormat(nf.format(getAvp()))); fracFormat(nf.format(getAvp())));
logger.println(prefix+format("MRR: ",M)+
fracFormat(nf.format(getMRR())));
logger.println(prefix+format("Recall: ",M)+ logger.println(prefix+format("Recall: ",M)+
fracFormat(nf.format(getRecall()))); fracFormat(nf.format(getRecall())));
for (int i=1; i<(int)numPoints && i<pAt.length; i++) { for (int i=1; i<(int)numPoints && i<pAt.length; i++) {
@ -186,6 +192,10 @@ public class QualityStats {
*/ */
public static QualityStats average(QualityStats[] stats) { public static QualityStats average(QualityStats[] stats) {
QualityStats avg = new QualityStats(0,0); QualityStats avg = new QualityStats(0,0);
if (stats.length==0) {
// weired, no stats to average!
return avg;
}
int m = 0; // queries with positive judgements int m = 0; // queries with positive judgements
// aggregate // aggregate
for (int i=0; i<stats.length; i++) { for (int i=0; i<stats.length; i++) {
@ -197,6 +207,7 @@ public class QualityStats {
avg.numPoints += stats[i].numPoints; avg.numPoints += stats[i].numPoints;
avg.pReleventSum += stats[i].getAvp(); avg.pReleventSum += stats[i].getAvp();
avg.recall += stats[i].recall; avg.recall += stats[i].recall;
avg.mrr += stats[i].getMRR();
avg.maxGoodPoints += stats[i].maxGoodPoints; avg.maxGoodPoints += stats[i].maxGoodPoints;
for (int j=1; j<avg.pAt.length; j++) { for (int j=1; j<avg.pAt.length; j++) {
avg.pAt[j] += stats[i].getPrecisionAt(j); avg.pAt[j] += stats[i].getPrecisionAt(j);
@ -210,6 +221,7 @@ public class QualityStats {
avg.numGoodPoints /= m; avg.numGoodPoints /= m;
avg.numPoints /= m; avg.numPoints /= m;
avg.recall /= m; avg.recall /= m;
avg.mrr /= m;
avg.maxGoodPoints /= m; avg.maxGoodPoints /= m;
for (int j=1; j<avg.pAt.length; j++) { for (int j=1; j<avg.pAt.length; j++) {
avg.pAt[j] /= m; avg.pAt[j] /= m;
@ -256,6 +268,22 @@ public class QualityStats {
return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]); return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]);
} }
/**
* Returns the Mean reciprocal rank over the queries or RR for a single query.
* <p>
* Reciprocal rank is defined as <code>1/r</code> where <code>r</code> is the
* rank of the first correct result, or <code>0</code> if there are no correct
* results within the top 5 results.
* <p>
* This follows the definition in
* <a href="http://www.cnlp.org/publications/02cnlptrec10.pdf">
* Question Answering - CNLP at the TREC-10 Question Answering Track</a>.
*/
public double getMRR() {
return mrr;
}
/** /**
* Returns the search time in milliseconds for the measured query. * Returns the search time in milliseconds for the measured query.
*/ */

View File

@ -46,8 +46,9 @@ public class QueryDriver {
// run the benchmark // run the benchmark
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField); QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
qrun.setMaxResults(maxResults);
SubmissionReport submitLog = null; SubmissionReport submitLog = null;
QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger); QualityStats stats[] = qrun.execute(judge, submitLog, logger);
// print an avarage sum of the results // print an avarage sum of the results
QualityStats avg = QualityStats.average(stats); QualityStats avg = QualityStats.average(stats);

View File

@ -0,0 +1,87 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.benchmark.quality.trec;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import org.apache.lucene.benchmark.quality.QualityQuery;
/**
* Read topics of TREC 1MQ track.
* <p>
* Expects this topic format -
* <pre>
* qnum:qtext
* </pre>
* Comment lines starting with '#' are ignored.
* <p>
* All topics will have a single name value pair.
*/
public class Trec1MQReader {
private String name;
/**
* Constructor for Trec's 1MQ TopicsReader
* @param name name of name-value pair to set for all queries.
*/
public Trec1MQReader(String name) {
super();
this.name = name;
}
/**
* Read quality queries from trec 1MQ format topics file.
* @param reader where queries are read from.
* @return the result quality queries.
* @throws IOException if cannot read the queries.
*/
public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
ArrayList res = new ArrayList();
String line;
try {
while (null!=(line=reader.readLine())) {
line = line.trim();
if (line.startsWith("#")) {
continue;
}
// id
int k = line.indexOf(":");
String id = line.substring(0,k).trim();
// qtext
String qtext = line.substring(k+1).trim();
// we got a topic!
HashMap fields = new HashMap();
fields.put(name,qtext);
//System.out.println("id: "+id+" qtext: "+qtext+" line: "+line);
QualityQuery topic = new QualityQuery(id,fields);
res.add(topic);
}
} finally {
reader.close();
}
// sort result array (by ID)
QualityQuery qq[] = (QualityQuery[]) res.toArray(new QualityQuery[0]);
Arrays.sort(qq);
return qq;
}
}

View File

@ -35,13 +35,16 @@ public class SubmissionReport {
private NumberFormat nf; private NumberFormat nf;
private PrintWriter logger; private PrintWriter logger;
private String name;
/** /**
* Constructor for SubmissionReport. * Constructor for SubmissionReport.
* @param logger if null, no submission data is created. * @param logger if null, no submission data is created.
* @param name name of this run.
*/ */
public SubmissionReport (PrintWriter logger) { public SubmissionReport (PrintWriter logger, String name) {
this.logger = logger; this.logger = logger;
this.name = name;
nf = NumberFormat.getInstance(); nf = NumberFormat.getInstance();
nf.setMaximumFractionDigits(4); nf.setMaximumFractionDigits(4);
nf.setMinimumFractionDigits(4); nf.setMinimumFractionDigits(4);
@ -66,14 +69,21 @@ public class SubmissionReport {
String docName = xt.docName(searcher,sd[i].doc); String docName = xt.docName(searcher,sd[i].doc);
logger.println( logger.println(
qq.getQueryID() + sep + qq.getQueryID() + sep +
'0' + sep + "Q0" + sep +
format(docName,20) + sep + format(docName,20) + sep +
format(""+i,7) + sep + format(""+i,7) + sep +
nf.format(sd[i].score) nf.format(sd[i].score) + sep +
name
); );
} }
} }
public void flush() {
if (logger!=null) {
logger.flush();
}
}
private static String padd = " "; private static String padd = " ";
private String format(String s, int minLen) { private String format(String s, int minLen) {
s = (s==null ? "" : s); s = (s==null ? "" : s);

View File

@ -87,8 +87,9 @@ public class TestQualityRun extends TestCase {
QualityQueryParser qqParser = new SimpleQQParser("title","body"); QualityQueryParser qqParser = new SimpleQQParser("title","body");
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField); QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger) : null; SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger, "TestRun") : null;
QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger); qrun.setMaxResults(maxResults);
QualityStats stats[] = qrun.execute(judge, submitLog, logger);
// --------- verify by the way judgments were altered for this test: // --------- verify by the way judgments were altered for this test:
// for some queries, depending on m = qnum % 8 // for some queries, depending on m = qnum % 8