LUCENE-1116: contrib/benchmark quality package improvements (MRR, Trec1MQ)

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@608370 13f79535-47bb-0310-9956-ffa450edef68
2008-01-03 07:44:40 +00:00 · 2008-01-03 07:44:40 +00:00 · 9e65cd554f
parent 40eb1cd53f
commit 9e65cd554f
7 changed files with 182 additions and 11 deletions
--- a/contrib/benchmark/CHANGES.txt
+++ b/contrib/benchmark/CHANGES.txt
@ -4,6 +4,13 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
 $Id:$
 01/03/08
  LUCENE-1116: quality package improvements:
  - add MRR computation; 
  - allow control of max #queries to run;
  - verify log & report are flushed.
  - add TREC query reader for the 1MQ track.  
 12/31/07
  LUCENE-1102: EnwikiDocMaker now indexes the docid field, so results might not be comparable with results prior to this change, although
  it is doubted that this one small field makes much difference.
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
@ -52,6 +52,12 @@ public class QualityBenchmark {
  /** index field to extract doc name for each search result; used for judging the results. */  
  protected String docNameField;
  /** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
  private int maxQueries = Integer.MAX_VALUE;
  /** maximal number of results to collect for each query. Default: 1000. */
  private int maxResults = 1000;
  /**
   * Create a QualityBenchmark.
   * @param qqs quality queries to run.
@ -71,7 +77,6 @@ public class QualityBenchmark {
  /**
   * Run the quality benchmark.
   * @param maxResults how many results to collect for each quality query.
   * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query. 
   *        If null, no judgements would be made. Usually null for a submission run. 
   * @param submitRep submission report is created if non null.
@ -79,10 +84,11 @@ public class QualityBenchmark {
   * @return QualityStats of each quality query that was executed.
   * @throws Exception if quality benchmark failed to run.
   */
-  public  QualityStats [] execute(int maxResults, Judge judge, SubmissionReport submitRep, 
+  public  QualityStats [] execute(Judge judge, SubmissionReport submitRep, 
                                  PrintWriter qualityLog) throws Exception {
-    QualityStats stats[] = new QualityStats[qualityQueries.length]; 
+    int nQueries = Math.min(maxQueries, qualityQueries.length);
-    for (int i=0; i<qualityQueries.length; i++) {
+    QualityStats stats[] = new QualityStats[nQueries]; 
    for (int i=0; i<nQueries; i++) {
      QualityQuery qq = qualityQueries[i];
      // generate query
      Query q = qqParser.parse(qq);
@ -98,6 +104,9 @@ public class QualityBenchmark {
        submitRep.report(qq,td,docNameField,searcher);
      }
    } 
    if (submitRep!=null) {
      submitRep.flush();
    }
    return stats;
  }
@ -121,4 +130,32 @@ public class QualityBenchmark {
    return stts;
  }
  /**
   * @return the maximum number of quality queries to run. Useful at debugging.
   */
  public int getMaxQueries() {
    return maxQueries;
  }
  /**
   * Set the maximum number of quality queries to run. Useful at debugging.
   */
  public void setMaxQueries(int maxQueries) {
    this.maxQueries = maxQueries;
  }
  /**
   * @return the maximum number of results to collect for each quality query.
   */
  public int getMaxResults() {
    return maxResults;
  }
  /**
   * set the maximum number of results to collect for each quality query.
   */
  public void setMaxResults(int maxResults) {
    this.maxResults = maxResults;
  }
 }
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
@ -34,6 +34,7 @@ public class QualityStats {
  private double pReleventSum = 0;
  private double numPoints = 0;
  private double numGoodPoints = 0;
  private double mrr = 0;
  private long searchTime;
  private long docNamesExtractTime;
@ -82,6 +83,9 @@ public class QualityStats {
    if (isRelevant) {
      numGoodPoints+=1;
      recallPoints.add(new RecallPoint(n,numGoodPoints));
      if (recallPoints.size()==1 && n<=5) { // first point, but only within 5 top scores. 
        mrr =  1.0 / n;
      }
    }
    numPoints = n;
    double p = numGoodPoints / numPoints;
@ -111,7 +115,7 @@ public class QualityStats {
  }
  /**
-   * Return the average precision at recall points: sum of precision at recall points / maxGoodPoints.
+   * Return the average precision at recall points.
   */
  public double getAvp() {
    return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
@ -154,6 +158,8 @@ public class QualityStats {
        fracFormat(nf.format(maxGoodPoints)));
    logger.println(prefix+format("Average Precision: ",M)+
        fracFormat(nf.format(getAvp())));
    logger.println(prefix+format("MRR: ",M)+
        fracFormat(nf.format(getMRR())));
    logger.println(prefix+format("Recall: ",M)+
        fracFormat(nf.format(getRecall())));
    for (int i=1; i<(int)numPoints && i<pAt.length; i++) {
@ -186,6 +192,10 @@ public class QualityStats {
   */
  public static QualityStats average(QualityStats[] stats) {
    QualityStats avg = new QualityStats(0,0);
    if (stats.length==0) {
      // weired, no stats to average!
      return avg;
    }
    int m = 0; // queries with positive judgements
    // aggregate
    for (int i=0; i<stats.length; i++) {
@ -197,6 +207,7 @@ public class QualityStats {
        avg.numPoints += stats[i].numPoints;
        avg.pReleventSum += stats[i].getAvp();
        avg.recall += stats[i].recall;
        avg.mrr += stats[i].getMRR();
        avg.maxGoodPoints += stats[i].maxGoodPoints;
        for (int j=1; j<avg.pAt.length; j++) {
          avg.pAt[j] += stats[i].getPrecisionAt(j);
@ -210,6 +221,7 @@ public class QualityStats {
    avg.numGoodPoints /= m;
    avg.numPoints /= m;
    avg.recall /= m;
    avg.mrr /= m;
    avg.maxGoodPoints /= m;
    for (int j=1; j<avg.pAt.length; j++) {
      avg.pAt[j] /= m;
@ -256,6 +268,22 @@ public class QualityStats {
    return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]);
  }
  /**
   * Returns the Mean reciprocal rank over the queries or RR for a single query.
   * <p>
   * Reciprocal rank is defined as <code>1/r</code> where <code>r</code> is the 
   * rank of the first correct result, or <code>0</code> if there are no correct 
   * results within the top 5 results. 
   * <p>
   * This follows the definition in 
   * <a href="http://www.cnlp.org/publications/02cnlptrec10.pdf"> 
   * Question Answering - CNLP at the TREC-10 Question Answering Track</a>.
   */
  public double getMRR() {
    return mrr;
  }
  /**
   * Returns the search time in milliseconds for the measured query.
   */
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
@ -46,8 +46,9 @@ public class QueryDriver {
    // run the benchmark
    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
    qrun.setMaxResults(maxResults);
    SubmissionReport submitLog = null;
-    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+    QualityStats stats[] = qrun.execute(judge, submitLog, logger);
    // print an avarage sum of the results
    QualityStats avg = QualityStats.average(stats);
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/Trec1MQReader.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/Trec1MQReader.java
@ -0,0 +1,87 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.benchmark.quality.trec;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import org.apache.lucene.benchmark.quality.QualityQuery;
 /**
 * Read topics of TREC 1MQ track.
 * <p>
 * Expects this topic format -
 * <pre>
 *   qnum:qtext
 * </pre>
 * Comment lines starting with '#' are ignored.
 * <p>
 * All topics will have a single name value pair.
 */
 public class Trec1MQReader {
  private String name;
  /**
   *  Constructor for Trec's 1MQ TopicsReader
   *  @param name name of name-value pair to set for all queries.
   */
  public Trec1MQReader(String name) {
    super();
    this.name = name;
  }
  /**
   * Read quality queries from trec 1MQ format topics file.
   * @param reader where queries are read from.
   * @return the result quality queries.
   * @throws IOException if cannot read the queries.
   */
  public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
    ArrayList res = new ArrayList();
    String line;
    try {
      while (null!=(line=reader.readLine())) {
        line = line.trim();
        if (line.startsWith("#")) {
          continue;
        }
        // id
        int k = line.indexOf(":");
        String id = line.substring(0,k).trim();
        // qtext
        String qtext = line.substring(k+1).trim();
        // we got a topic!
        HashMap fields = new HashMap();
        fields.put(name,qtext);
        //System.out.println("id: "+id+" qtext: "+qtext+"  line: "+line);
        QualityQuery topic = new QualityQuery(id,fields);
        res.add(topic);
      }
    } finally {
      reader.close();
    }
    // sort result array (by ID) 
    QualityQuery qq[] = (QualityQuery[]) res.toArray(new QualityQuery[0]);
    Arrays.sort(qq);
    return qq;
  }
 }
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
@ -35,13 +35,16 @@ public class SubmissionReport {
  private NumberFormat nf;
  private PrintWriter logger;
  private String name;
  /**
   * Constructor for SubmissionReport.
   * @param logger if null, no submission data is created. 
   * @param name name of this run.
   */
-  public SubmissionReport (PrintWriter logger) {
+  public SubmissionReport (PrintWriter logger, String name) {
    this.logger = logger;
    this.name = name;
    nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(4);
    nf.setMinimumFractionDigits(4);
@ -66,14 +69,21 @@ public class SubmissionReport {
      String docName = xt.docName(searcher,sd[i].doc);
      logger.println(
          qq.getQueryID()       + sep +
-          '0'                   + sep +
+          "Q0"                   + sep +
          format(docName,20)    + sep +
          format(""+i,7)        + sep +
-          nf.format(sd[i].score)
+          nf.format(sd[i].score) + sep +
          name
          );
    }
  }
  public void flush() {
    if (logger!=null) {
      logger.flush();
    }
  }
  private static String padd = "                                    ";
  private String format(String s, int minLen) {
    s = (s==null ? "" : s);
--- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
+++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
@ -87,8 +87,9 @@ public class TestQualityRun extends TestCase {
    QualityQueryParser qqParser = new SimpleQQParser("title","body");
    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
-    SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger) : null;
+    SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger, "TestRun") : null;
-    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+    qrun.setMaxResults(maxResults);
    QualityStats stats[] = qrun.execute(judge, submitLog, logger);
    // --------- verify by the way judgments were altered for this test:
    // for some queries, depending on m = qnum % 8