LUCENE-1116: contrib/benchmark quality package improvements (MRR, Trec1MQ)

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@608370 13f79535-47bb-0310-9956-ffa450edef68
2025-02-21 17:46:28 +00:00 · 2008-01-03 07:44:40 +00:00 · 2008-01-03 07:44:40 +00:00 · 9e65cd554f
commit 9e65cd554f
parent 40eb1cd53f
7 changed files with 182 additions and 11 deletions
--- a/contrib/benchmark/CHANGES.txt
+++ b/contrib/benchmark/CHANGES.txt
@ -4,6 +4,13 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety

 $Id:$

+01/03/08
+  LUCENE-1116: quality package improvements:
+  - add MRR computation; 
+  - allow control of max #queries to run;
+  - verify log & report are flushed.
+  - add TREC query reader for the 1MQ track.  
+      
 12/31/07
  LUCENE-1102: EnwikiDocMaker now indexes the docid field, so results might not be comparable with results prior to this change, although
  it is doubted that this one small field makes much difference.
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
@ -51,6 +51,12 @@ public class QualityBenchmark {

  /** index field to extract doc name for each search result; used for judging the results. */  
  protected String docNameField;
+  
+  /** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
+  private int maxQueries = Integer.MAX_VALUE;
+  
+  /** maximal number of results to collect for each query. Default: 1000. */
+  private int maxResults = 1000;

  /**
   * Create a QualityBenchmark.
@ -71,7 +77,6 @@ public class QualityBenchmark {

  /**
   * Run the quality benchmark.
-   * @param maxResults how many results to collect for each quality query.
   * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query. 
   *        If null, no judgements would be made. Usually null for a submission run. 
   * @param submitRep submission report is created if non null.
@ -79,10 +84,11 @@ public class QualityBenchmark {
   * @return QualityStats of each quality query that was executed.
   * @throws Exception if quality benchmark failed to run.
   */
-  public  QualityStats [] execute(int maxResults, Judge judge, SubmissionReport submitRep, 
+  public  QualityStats [] execute(Judge judge, SubmissionReport submitRep, 
                                  PrintWriter qualityLog) throws Exception {
-    QualityStats stats[] = new QualityStats[qualityQueries.length]; 
-    for (int i=0; i<qualityQueries.length; i++) {
+    int nQueries = Math.min(maxQueries, qualityQueries.length);
+    QualityStats stats[] = new QualityStats[nQueries]; 
+    for (int i=0; i<nQueries; i++) {
      QualityQuery qq = qualityQueries[i];
      // generate query
      Query q = qqParser.parse(qq);
@ -98,6 +104,9 @@ public class QualityBenchmark {
        submitRep.report(qq,td,docNameField,searcher);
      }
    } 
+    if (submitRep!=null) {
+      submitRep.flush();
+    }
    return stats;
  }
  
@ -121,4 +130,32 @@ public class QualityBenchmark {
    return stts;
  }

+  /**
+   * @return the maximum number of quality queries to run. Useful at debugging.
+   */
+  public int getMaxQueries() {
+    return maxQueries;
+  }
+
+  /**
+   * Set the maximum number of quality queries to run. Useful at debugging.
+   */
+  public void setMaxQueries(int maxQueries) {
+    this.maxQueries = maxQueries;
+  }
+
+  /**
+   * @return the maximum number of results to collect for each quality query.
+   */
+  public int getMaxResults() {
+    return maxResults;
+  }
+
+  /**
+   * set the maximum number of results to collect for each quality query.
+   */
+  public void setMaxResults(int maxResults) {
+    this.maxResults = maxResults;
+  }
+
 }
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
@ -34,6 +34,7 @@ public class QualityStats {
  private double pReleventSum = 0;
  private double numPoints = 0;
  private double numGoodPoints = 0;
+  private double mrr = 0;
  private long searchTime;
  private long docNamesExtractTime;

@ -82,6 +83,9 @@ public class QualityStats {
    if (isRelevant) {
      numGoodPoints+=1;
      recallPoints.add(new RecallPoint(n,numGoodPoints));
+      if (recallPoints.size()==1 && n<=5) { // first point, but only within 5 top scores. 
+        mrr =  1.0 / n;
+      }
    }
    numPoints = n;
    double p = numGoodPoints / numPoints;
@ -111,7 +115,7 @@ public class QualityStats {
  }

  /**
-   * Return the average precision at recall points: sum of precision at recall points / maxGoodPoints.
+   * Return the average precision at recall points.
   */
  public double getAvp() {
    return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
@ -154,6 +158,8 @@ public class QualityStats {
        fracFormat(nf.format(maxGoodPoints)));
    logger.println(prefix+format("Average Precision: ",M)+
        fracFormat(nf.format(getAvp())));
+    logger.println(prefix+format("MRR: ",M)+
+        fracFormat(nf.format(getMRR())));
    logger.println(prefix+format("Recall: ",M)+
        fracFormat(nf.format(getRecall())));
    for (int i=1; i<(int)numPoints && i<pAt.length; i++) {
@ -186,6 +192,10 @@ public class QualityStats {
   */
  public static QualityStats average(QualityStats[] stats) {
    QualityStats avg = new QualityStats(0,0);
+    if (stats.length==0) {
+      // weired, no stats to average!
+      return avg;
+    }
    int m = 0; // queries with positive judgements
    // aggregate
    for (int i=0; i<stats.length; i++) {
@ -197,6 +207,7 @@ public class QualityStats {
        avg.numPoints += stats[i].numPoints;
        avg.pReleventSum += stats[i].getAvp();
        avg.recall += stats[i].recall;
+        avg.mrr += stats[i].getMRR();
        avg.maxGoodPoints += stats[i].maxGoodPoints;
        for (int j=1; j<avg.pAt.length; j++) {
          avg.pAt[j] += stats[i].getPrecisionAt(j);
@ -210,6 +221,7 @@ public class QualityStats {
    avg.numGoodPoints /= m;
    avg.numPoints /= m;
    avg.recall /= m;
+    avg.mrr /= m;
    avg.maxGoodPoints /= m;
    for (int j=1; j<avg.pAt.length; j++) {
      avg.pAt[j] /= m;
@ -256,6 +268,22 @@ public class QualityStats {
    return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]);
  }

+  /**
+   * Returns the Mean reciprocal rank over the queries or RR for a single query.
+   * <p>
+   * Reciprocal rank is defined as <code>1/r</code> where <code>r</code> is the 
+   * rank of the first correct result, or <code>0</code> if there are no correct 
+   * results within the top 5 results. 
+   * <p>
+   * This follows the definition in 
+   * <a href="http://www.cnlp.org/publications/02cnlptrec10.pdf"> 
+   * Question Answering - CNLP at the TREC-10 Question Answering Track</a>.
+   */
+  public double getMRR() {
+    return mrr;
+  }
+
+  
  /**
   * Returns the search time in milliseconds for the measured query.
   */
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
@ -46,8 +46,9 @@ public class QueryDriver {

    // run the benchmark
    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+    qrun.setMaxResults(maxResults);
    SubmissionReport submitLog = null;
-    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+    QualityStats stats[] = qrun.execute(judge, submitLog, logger);

    // print an avarage sum of the results
    QualityStats avg = QualityStats.average(stats);
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/Trec1MQReader.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/Trec1MQReader.java
@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.trec;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+import org.apache.lucene.benchmark.quality.QualityQuery;
+
+/**
+ * Read topics of TREC 1MQ track.
+ * <p>
+ * Expects this topic format -
+ * <pre>
+ *   qnum:qtext
+ * </pre>
+ * Comment lines starting with '#' are ignored.
+ * <p>
+ * All topics will have a single name value pair.
+ */
+public class Trec1MQReader {
+
+  private String name;
+  
+  /**
+   *  Constructor for Trec's 1MQ TopicsReader
+   *  @param name name of name-value pair to set for all queries.
+   */
+  public Trec1MQReader(String name) {
+    super();
+    this.name = name;
+  }
+
+  /**
+   * Read quality queries from trec 1MQ format topics file.
+   * @param reader where queries are read from.
+   * @return the result quality queries.
+   * @throws IOException if cannot read the queries.
+   */
+  public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
+    ArrayList res = new ArrayList();
+    String line;
+    try {
+      while (null!=(line=reader.readLine())) {
+        line = line.trim();
+        if (line.startsWith("#")) {
+          continue;
+        }
+        // id
+        int k = line.indexOf(":");
+        String id = line.substring(0,k).trim();
+        // qtext
+        String qtext = line.substring(k+1).trim();
+        // we got a topic!
+        HashMap fields = new HashMap();
+        fields.put(name,qtext);
+        //System.out.println("id: "+id+" qtext: "+qtext+"  line: "+line);
+        QualityQuery topic = new QualityQuery(id,fields);
+        res.add(topic);
+      }
+    } finally {
+      reader.close();
+    }
+    // sort result array (by ID) 
+    QualityQuery qq[] = (QualityQuery[]) res.toArray(new QualityQuery[0]);
+    Arrays.sort(qq);
+    return qq;
+  }
+
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
@ -35,13 +35,16 @@ public class SubmissionReport {

  private NumberFormat nf;
  private PrintWriter logger;
+  private String name;
  
  /**
   * Constructor for SubmissionReport.
   * @param logger if null, no submission data is created. 
+   * @param name name of this run.
   */
-  public SubmissionReport (PrintWriter logger) {
+  public SubmissionReport (PrintWriter logger, String name) {
    this.logger = logger;
+    this.name = name;
    nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(4);
    nf.setMinimumFractionDigits(4);
@ -66,14 +69,21 @@ public class SubmissionReport {
      String docName = xt.docName(searcher,sd[i].doc);
      logger.println(
          qq.getQueryID()       + sep +
-          '0'                   + sep +
+          "Q0"                   + sep +
          format(docName,20)    + sep +
          format(""+i,7)        + sep +
-          nf.format(sd[i].score)
+          nf.format(sd[i].score) + sep +
+          name
          );
    }
  }

+  public void flush() {
+    if (logger!=null) {
+      logger.flush();
+    }
+  }
+  
  private static String padd = "                                    ";
  private String format(String s, int minLen) {
    s = (s==null ? "" : s);
--- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
+++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
@ -87,8 +87,9 @@ public class TestQualityRun extends TestCase {
    QualityQueryParser qqParser = new SimpleQQParser("title","body");
    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
    
-    SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger) : null;
-    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+    SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger, "TestRun") : null;
+    qrun.setMaxResults(maxResults);
+    QualityStats stats[] = qrun.execute(judge, submitLog, logger);
    
    // --------- verify by the way judgments were altered for this test:
    // for some queries, depending on m = qnum % 8