mirror of https://github.com/apache/lucene.git
LUCENE-1116: contrib/benchmark quality package improvements (MRR, Trec1MQ)
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@608370 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
40eb1cd53f
commit
9e65cd554f
|
@ -4,6 +4,13 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
|||
|
||||
$Id:$
|
||||
|
||||
01/03/08
|
||||
LUCENE-1116: quality package improvements:
|
||||
- add MRR computation;
|
||||
- allow control of max #queries to run;
|
||||
- verify log & report are flushed.
|
||||
- add TREC query reader for the 1MQ track.
|
||||
|
||||
12/31/07
|
||||
LUCENE-1102: EnwikiDocMaker now indexes the docid field, so results might not be comparable with results prior to this change, although
|
||||
it is doubted that this one small field makes much difference.
|
||||
|
|
|
@ -51,6 +51,12 @@ public class QualityBenchmark {
|
|||
|
||||
/** index field to extract doc name for each search result; used for judging the results. */
|
||||
protected String docNameField;
|
||||
|
||||
/** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
|
||||
private int maxQueries = Integer.MAX_VALUE;
|
||||
|
||||
/** maximal number of results to collect for each query. Default: 1000. */
|
||||
private int maxResults = 1000;
|
||||
|
||||
/**
|
||||
* Create a QualityBenchmark.
|
||||
|
@ -71,7 +77,6 @@ public class QualityBenchmark {
|
|||
|
||||
/**
|
||||
* Run the quality benchmark.
|
||||
* @param maxResults how many results to collect for each quality query.
|
||||
* @param judge the judge that can tell if a certain result doc is relevant for a certain quality query.
|
||||
* If null, no judgements would be made. Usually null for a submission run.
|
||||
* @param submitRep submission report is created if non null.
|
||||
|
@ -79,10 +84,11 @@ public class QualityBenchmark {
|
|||
* @return QualityStats of each quality query that was executed.
|
||||
* @throws Exception if quality benchmark failed to run.
|
||||
*/
|
||||
public QualityStats [] execute(int maxResults, Judge judge, SubmissionReport submitRep,
|
||||
public QualityStats [] execute(Judge judge, SubmissionReport submitRep,
|
||||
PrintWriter qualityLog) throws Exception {
|
||||
QualityStats stats[] = new QualityStats[qualityQueries.length];
|
||||
for (int i=0; i<qualityQueries.length; i++) {
|
||||
int nQueries = Math.min(maxQueries, qualityQueries.length);
|
||||
QualityStats stats[] = new QualityStats[nQueries];
|
||||
for (int i=0; i<nQueries; i++) {
|
||||
QualityQuery qq = qualityQueries[i];
|
||||
// generate query
|
||||
Query q = qqParser.parse(qq);
|
||||
|
@ -98,6 +104,9 @@ public class QualityBenchmark {
|
|||
submitRep.report(qq,td,docNameField,searcher);
|
||||
}
|
||||
}
|
||||
if (submitRep!=null) {
|
||||
submitRep.flush();
|
||||
}
|
||||
return stats;
|
||||
}
|
||||
|
||||
|
@ -121,4 +130,32 @@ public class QualityBenchmark {
|
|||
return stts;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the maximum number of quality queries to run. Useful at debugging.
|
||||
*/
|
||||
public int getMaxQueries() {
|
||||
return maxQueries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum number of quality queries to run. Useful at debugging.
|
||||
*/
|
||||
public void setMaxQueries(int maxQueries) {
|
||||
this.maxQueries = maxQueries;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the maximum number of results to collect for each quality query.
|
||||
*/
|
||||
public int getMaxResults() {
|
||||
return maxResults;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the maximum number of results to collect for each quality query.
|
||||
*/
|
||||
public void setMaxResults(int maxResults) {
|
||||
this.maxResults = maxResults;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ public class QualityStats {
|
|||
private double pReleventSum = 0;
|
||||
private double numPoints = 0;
|
||||
private double numGoodPoints = 0;
|
||||
private double mrr = 0;
|
||||
private long searchTime;
|
||||
private long docNamesExtractTime;
|
||||
|
||||
|
@ -82,6 +83,9 @@ public class QualityStats {
|
|||
if (isRelevant) {
|
||||
numGoodPoints+=1;
|
||||
recallPoints.add(new RecallPoint(n,numGoodPoints));
|
||||
if (recallPoints.size()==1 && n<=5) { // first point, but only within 5 top scores.
|
||||
mrr = 1.0 / n;
|
||||
}
|
||||
}
|
||||
numPoints = n;
|
||||
double p = numGoodPoints / numPoints;
|
||||
|
@ -111,7 +115,7 @@ public class QualityStats {
|
|||
}
|
||||
|
||||
/**
|
||||
* Return the average precision at recall points: sum of precision at recall points / maxGoodPoints.
|
||||
* Return the average precision at recall points.
|
||||
*/
|
||||
public double getAvp() {
|
||||
return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
|
||||
|
@ -154,6 +158,8 @@ public class QualityStats {
|
|||
fracFormat(nf.format(maxGoodPoints)));
|
||||
logger.println(prefix+format("Average Precision: ",M)+
|
||||
fracFormat(nf.format(getAvp())));
|
||||
logger.println(prefix+format("MRR: ",M)+
|
||||
fracFormat(nf.format(getMRR())));
|
||||
logger.println(prefix+format("Recall: ",M)+
|
||||
fracFormat(nf.format(getRecall())));
|
||||
for (int i=1; i<(int)numPoints && i<pAt.length; i++) {
|
||||
|
@ -186,6 +192,10 @@ public class QualityStats {
|
|||
*/
|
||||
public static QualityStats average(QualityStats[] stats) {
|
||||
QualityStats avg = new QualityStats(0,0);
|
||||
if (stats.length==0) {
|
||||
// weired, no stats to average!
|
||||
return avg;
|
||||
}
|
||||
int m = 0; // queries with positive judgements
|
||||
// aggregate
|
||||
for (int i=0; i<stats.length; i++) {
|
||||
|
@ -197,6 +207,7 @@ public class QualityStats {
|
|||
avg.numPoints += stats[i].numPoints;
|
||||
avg.pReleventSum += stats[i].getAvp();
|
||||
avg.recall += stats[i].recall;
|
||||
avg.mrr += stats[i].getMRR();
|
||||
avg.maxGoodPoints += stats[i].maxGoodPoints;
|
||||
for (int j=1; j<avg.pAt.length; j++) {
|
||||
avg.pAt[j] += stats[i].getPrecisionAt(j);
|
||||
|
@ -210,6 +221,7 @@ public class QualityStats {
|
|||
avg.numGoodPoints /= m;
|
||||
avg.numPoints /= m;
|
||||
avg.recall /= m;
|
||||
avg.mrr /= m;
|
||||
avg.maxGoodPoints /= m;
|
||||
for (int j=1; j<avg.pAt.length; j++) {
|
||||
avg.pAt[j] /= m;
|
||||
|
@ -256,6 +268,22 @@ public class QualityStats {
|
|||
return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Mean reciprocal rank over the queries or RR for a single query.
|
||||
* <p>
|
||||
* Reciprocal rank is defined as <code>1/r</code> where <code>r</code> is the
|
||||
* rank of the first correct result, or <code>0</code> if there are no correct
|
||||
* results within the top 5 results.
|
||||
* <p>
|
||||
* This follows the definition in
|
||||
* <a href="http://www.cnlp.org/publications/02cnlptrec10.pdf">
|
||||
* Question Answering - CNLP at the TREC-10 Question Answering Track</a>.
|
||||
*/
|
||||
public double getMRR() {
|
||||
return mrr;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the search time in milliseconds for the measured query.
|
||||
*/
|
||||
|
|
|
@ -46,8 +46,9 @@ public class QueryDriver {
|
|||
|
||||
// run the benchmark
|
||||
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
|
||||
qrun.setMaxResults(maxResults);
|
||||
SubmissionReport submitLog = null;
|
||||
QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
|
||||
QualityStats stats[] = qrun.execute(judge, submitLog, logger);
|
||||
|
||||
// print an avarage sum of the results
|
||||
QualityStats avg = QualityStats.average(stats);
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.benchmark.quality.trec;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.apache.lucene.benchmark.quality.QualityQuery;
|
||||
|
||||
/**
|
||||
* Read topics of TREC 1MQ track.
|
||||
* <p>
|
||||
* Expects this topic format -
|
||||
* <pre>
|
||||
* qnum:qtext
|
||||
* </pre>
|
||||
* Comment lines starting with '#' are ignored.
|
||||
* <p>
|
||||
* All topics will have a single name value pair.
|
||||
*/
|
||||
public class Trec1MQReader {
|
||||
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* Constructor for Trec's 1MQ TopicsReader
|
||||
* @param name name of name-value pair to set for all queries.
|
||||
*/
|
||||
public Trec1MQReader(String name) {
|
||||
super();
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read quality queries from trec 1MQ format topics file.
|
||||
* @param reader where queries are read from.
|
||||
* @return the result quality queries.
|
||||
* @throws IOException if cannot read the queries.
|
||||
*/
|
||||
public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
|
||||
ArrayList res = new ArrayList();
|
||||
String line;
|
||||
try {
|
||||
while (null!=(line=reader.readLine())) {
|
||||
line = line.trim();
|
||||
if (line.startsWith("#")) {
|
||||
continue;
|
||||
}
|
||||
// id
|
||||
int k = line.indexOf(":");
|
||||
String id = line.substring(0,k).trim();
|
||||
// qtext
|
||||
String qtext = line.substring(k+1).trim();
|
||||
// we got a topic!
|
||||
HashMap fields = new HashMap();
|
||||
fields.put(name,qtext);
|
||||
//System.out.println("id: "+id+" qtext: "+qtext+" line: "+line);
|
||||
QualityQuery topic = new QualityQuery(id,fields);
|
||||
res.add(topic);
|
||||
}
|
||||
} finally {
|
||||
reader.close();
|
||||
}
|
||||
// sort result array (by ID)
|
||||
QualityQuery qq[] = (QualityQuery[]) res.toArray(new QualityQuery[0]);
|
||||
Arrays.sort(qq);
|
||||
return qq;
|
||||
}
|
||||
|
||||
}
|
|
@ -35,13 +35,16 @@ public class SubmissionReport {
|
|||
|
||||
private NumberFormat nf;
|
||||
private PrintWriter logger;
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* Constructor for SubmissionReport.
|
||||
* @param logger if null, no submission data is created.
|
||||
* @param name name of this run.
|
||||
*/
|
||||
public SubmissionReport (PrintWriter logger) {
|
||||
public SubmissionReport (PrintWriter logger, String name) {
|
||||
this.logger = logger;
|
||||
this.name = name;
|
||||
nf = NumberFormat.getInstance();
|
||||
nf.setMaximumFractionDigits(4);
|
||||
nf.setMinimumFractionDigits(4);
|
||||
|
@ -66,14 +69,21 @@ public class SubmissionReport {
|
|||
String docName = xt.docName(searcher,sd[i].doc);
|
||||
logger.println(
|
||||
qq.getQueryID() + sep +
|
||||
'0' + sep +
|
||||
"Q0" + sep +
|
||||
format(docName,20) + sep +
|
||||
format(""+i,7) + sep +
|
||||
nf.format(sd[i].score)
|
||||
nf.format(sd[i].score) + sep +
|
||||
name
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public void flush() {
|
||||
if (logger!=null) {
|
||||
logger.flush();
|
||||
}
|
||||
}
|
||||
|
||||
private static String padd = " ";
|
||||
private String format(String s, int minLen) {
|
||||
s = (s==null ? "" : s);
|
||||
|
|
|
@ -87,8 +87,9 @@ public class TestQualityRun extends TestCase {
|
|||
QualityQueryParser qqParser = new SimpleQQParser("title","body");
|
||||
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
|
||||
|
||||
SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger) : null;
|
||||
QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
|
||||
SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger, "TestRun") : null;
|
||||
qrun.setMaxResults(maxResults);
|
||||
QualityStats stats[] = qrun.execute(judge, submitLog, logger);
|
||||
|
||||
// --------- verify by the way judgments were altered for this test:
|
||||
// for some queries, depending on m = qnum % 8
|
||||
|
|
Loading…
Reference in New Issue