LUCENE-836: Add support for search quality benchmarking.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@560372 13f79535-47bb-0310-9956-ffa450edef68
2007-07-27 20:24:52 +00:00 · 2007-07-27 20:24:52 +00:00 · 98fa2d898d
parent c1496444b2
commit 98fa2d898d
20 changed files with 2469 additions and 6 deletions
--- a/common-build.xml
+++ b/common-build.xml
@ -284,6 +284,8 @@
    </copy>
  </target>

+  <property name="tests.verbose" value="false"/>
+
  <target name="test" depends="compile-test" description="Runs unit tests">
    <fail unless="junit.present">
      ##################################################################
@ -299,6 +301,10 @@
      <assertions>
        <enable package="org.apache.lucene"/>
      </assertions>
+
+      <!-- allow tests to control debug prints -->
+      <sysproperty key="tests.verbose" value="${tests.verbose}"/>
+
      <!-- TODO: create propertyset for test properties, so each project can have its own set -->
      <sysproperty key="dataDir" file="src/test"/>
      <sysproperty key="tempDir" file="${build.dir}/test"/>
--- a/contrib/benchmark/CHANGES.txt
+++ b/contrib/benchmark/CHANGES.txt
@ -4,6 +4,14 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety

 $Id:$

+7/27/07
+  LUCENE-836: Add support for search quality benchmarking, running 
+  a set of queries against a searcher, and, optionally produce a submission
+  report, and, if query judgements are available, compute quality measures:
+  recall, precision_at_N, average_precision, MAP. TREC specific Judge (based 
+  on TREC QRels) and TREC Topics reader are included in o.a.l.benchmark.quality.trec
+  but any other format of queries and judgements can be implemented and used.
+  
 7/24/07
  LUCENE-947: Add support for creating and index "one document per
  line" from a large text file, which reduces per-document overhead of
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/Judge.java
@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.PrintWriter;
+
+/**
+ * Judge if a document is relevant for a quality query.
+ */
+public interface Judge {
+
+  /**
+   * Judge if document <code>docName</code> is relevant for the given quality query.
+   * @param docName name of doc tested for relevancy.
+   * @param query tested quality query. 
+   * @return true if relevant, false if not.
+   */
+  public boolean isRelevant(String docName, QualityQuery query);
+
+  /**
+   * Validate that queries and this Judge match each other.
+   * To be perfectly valid, this Judge must have some data for each and every 
+   * input quality query, and must not have any data on any other quality query.  
+   * <b>Note</b>: the quality benchmark run would not fail in case of imperfect
+   * validity, just a warning message would be logged.  
+   * @param qq quality queries to be validated.
+   * @param logger if not null, validation issues are logged.
+   * @return true if perfectly valid, false if not.
+   */
+  public boolean validateData (QualityQuery qq[], PrintWriter logger);
+  
+  /**
+   * Return the maximal recall for the input quality query. 
+   * It is the number of relevant docs this Judge "knows" for the query. 
+   * @param query the query whose maximal recall is needed.
+   */
+  public int maxRecall (QualityQuery query);
+
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
+import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TopDocs;
+
+/**
+ * Main entry point for running a quality benchmark.
+ * <p>
+ * There are two main configurations for running a quality benchmark: <ul>
+ * <li>Against existing judgements.</li>
+ * <li>For submission (e.g. for a contest).</li>
+ * </ul>
+ * The first configuration requires a non null
+ * {@link org.apache.lucene.benchmark.quality.Judge Judge}. 
+ * The second configuration requires a non null 
+ * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
+ */
+public class QualityBenchmark {
+
+  /** Quality Queries that this quality benchmark would execute. */
+  protected QualityQuery qualityQueries[];
+  
+  /** Parser for turning QualityQueries into Lucene Queries. */
+  protected QualityQueryParser qqParser;
+  
+  /** Index to be searched. */
+  protected Searcher searcher;
+
+  /** index field to extract doc name for each search result; used for judging the results. */  
+  protected String docNameField;
+
+  /**
+   * Create a QualityBenchmark.
+   * @param qqs quality queries to run.
+   * @param qqParser parser for turning QualityQueries into Lucene Queries. 
+   * @param searcher index to be searched.
+   * @param docNameField name of field containg the document name.
+   *        This allows to extract the doc name for search results,
+   *        and is important for judging the results.  
+   */
+  public QualityBenchmark(QualityQuery qqs[], QualityQueryParser qqParser, 
+      Searcher searcher, String docNameField) {
+    this.qualityQueries = qqs;
+    this.qqParser = qqParser;
+    this.searcher = searcher;
+    this.docNameField = docNameField;
+  }
+
+  /**
+   * Run the quality benchmark.
+   * @param maxResults how many results to collect for each quality query.
+   * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query. 
+   *        If null, no judgements would be made. Usually null for a submission run. 
+   * @param submitRep submission report is created if non null.
+   * @param qualityLog If not null, quality run data would be printed for each query.
+   * @return QualityStats of each quality query that was executed.
+   * @throws Exception if quality benchmark failed to run.
+   */
+  public  QualityStats [] execute(int maxResults, Judge judge, SubmissionReport submitRep, 
+                                  PrintWriter qualityLog) throws Exception {
+    QualityStats stats[] = new QualityStats[qualityQueries.length]; 
+    for (int i=0; i<qualityQueries.length; i++) {
+      QualityQuery qq = qualityQueries[i];
+      // generate query
+      Query q = qqParser.parse(qq);
+      // search with this query 
+      long t1 = System.currentTimeMillis();
+      TopDocs td = searcher.search(q,null,maxResults);
+      long searchTime = System.currentTimeMillis()-t1;
+      //most likely we either submit or judge, but check both 
+      if (judge!=null) {
+        stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
+      }
+      if (submitRep!=null) {
+        submitRep.report(qq,td,docNameField,searcher);
+      }
+    } 
+    return stats;
+  }
+  
+  /* Analyze/judge results for a single quality query; optionally log them. */  
+  private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
+    QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
+    ScoreDoc sd[] = td.scoreDocs;
+    long t1 = System.currentTimeMillis(); // extraction of first doc name we meassure also construction of doc name extractor, just in case.
+    DocNameExtractor xt = new DocNameExtractor(docNameField);
+    for (int i=0; i<sd.length; i++) {
+      String docName = xt.docName(searcher,sd[i].doc);
+      long docNameExtractTime = System.currentTimeMillis() - t1;
+      t1 = System.currentTimeMillis();
+      boolean isRelevant = judge.isRelevant(docName,qq);
+      stts.addResult(i+1,isRelevant, docNameExtractTime);
+    }
+    if (logger!=null) {
+      logger.println(qq.getQueryID()+"  -  "+q);
+      stts.log(qq.getQueryID()+" Stats:",1,logger,"  ");
+    }
+    return stts;
+  }
+
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQuery.java
@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.util.Map;
+
+/**
+ * A QualityQuery has an ID and some name-value pairs.
+ * <p> 
+ * The ID allows to map the quality query with its judgements.
+ * <p>
+ * The name-value pairs are used by a 
+ * {@link org.apache.lucene.benchmark.quality.QualityQueryParser}
+ * to create a Lucene {@link org.apache.lucene.search.Query}.
+ * <p>
+ * It is very likely that name-value-pairs would be mapped into fields in a Lucene query,
+ * but it is up to the QualityQueryParser how to map - e.g. all values in a single field, 
+ * or each pair as its own field, etc., - and this of course must match the way the 
+ * searched index was constructed.
+ */
+public class QualityQuery implements Comparable {
+  private String queryID;
+  private Map nameValPairs;
+
+  /**
+   * Create a QualityQuery with given ID and name-value pairs.
+   * @param queryID ID of this quality query.
+   * @param nameValPairs the contents of this quality query.
+   */
+  public QualityQuery(String queryID, Map nameValPairs) {
+    this.queryID = queryID;
+    this.nameValPairs = nameValPairs;
+  }
+  
+  /**
+   * Return all the names of name-value-pairs in this QualityQuery.
+   */
+  public String[] getNames() {
+    return (String[]) nameValPairs.keySet().toArray(new String[0]);
+  }
+
+  /**
+   * Return the value of a certain name-value pair.
+   * @param name the name whose value should be returned. 
+   */
+  public String getValue(String name) {
+    return (String) nameValPairs.get(name);
+  }
+
+  /**
+   * Return the ID of this query.
+   * The ID allows to map the quality query with its judgements.
+   */
+  public String getQueryID() {
+    return queryID;
+  }
+
+  /* for a nicer sort of input queries before running them.
+   * Try first as ints, fall back to string if not int. */ 
+  public int compareTo(Object o) {
+    QualityQuery other = (QualityQuery) o;
+    try {
+      // compare as ints when ids ints
+      int n = Integer.parseInt(queryID);
+      int nOther = Integer.parseInt(other.queryID);
+      return n - nOther;
+    } catch (NumberFormatException e) {
+      // fall back to string comparison
+      return queryID.compareTo(other.queryID);
+    }
+  }
+  
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityQueryParser.java
@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Query;
+
+/**
+ * Parse a QualityQuery into a Lucene query.
+ */
+public interface QualityQueryParser {
+
+  /**
+   * Parse a given QualityQuery into a Lucene query.
+   * @param qq the quality query to be parsed.
+   * @throws ParseException if parsing failed.
+   */
+  public Query parse(QualityQuery qq) throws ParseException;
+  
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.PrintWriter;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+
+/**
+ * Results of quality benchmark run for a single query or for a set of queries.
+ */
+public class QualityStats {
+
+  /** Number of points for which precision is computed. */
+  public static final int MAX_POINTS = 20;
+  
+  private double maxGoodPoints;
+  private double recall;
+  private double pAt[];
+  private double pReleventSum = 0;
+  private double numPoints = 0;
+  private double numGoodPoints = 0;
+  private long searchTime;
+  private long docNamesExtractTime;
+
+  /**
+   * A certain rank in which a relevant doc was found.
+   */
+  public static class RecallPoint {
+    private int rank;
+    private double recall;
+    private RecallPoint(int rank, double recall) {
+      this.rank = rank;
+      this.recall = recall;
+    }
+    /** Returns the rank: where on the list of returned docs this relevant doc appeared. */
+    public int getRank() {
+      return rank;
+    }
+    /** Returns the recall: how many relevant docs were returned up to this point, inclusive. */
+    public double getRecall() {
+      return recall;
+    }
+  }
+  
+  private ArrayList recallPoints;
+  
+  /**
+   * Construct a QualityStats object with anticipated maximal number of relevant hits. 
+   * @param maxGoodPoints maximal possible relevant hits.
+   */
+  public QualityStats(double maxGoodPoints, long searchTime) {
+    this.maxGoodPoints = maxGoodPoints;
+    this.searchTime = searchTime;
+    this.recallPoints = new ArrayList();
+    pAt = new double[MAX_POINTS+1]; // pAt[0] unused. 
+  }
+
+  /**
+   * Add a (possibly relevant) doc.
+   * @param n rank of the added doc (its ordinal position within the query results).
+   * @param isRelevant true if the added doc is relevant, false otherwise.
+   */
+  public void addResult(int n, boolean isRelevant, long docNameExtractTime) {
+    if (Math.abs(numPoints+1 - n) > 1E-6) {
+      throw new IllegalArgumentException("point "+n+" illegal after "+numPoints+" points!");
+    }
+    if (isRelevant) {
+      numGoodPoints+=1;
+      recallPoints.add(new RecallPoint(n,numGoodPoints));
+    }
+    numPoints = n;
+    double p = numGoodPoints / numPoints;
+    if (isRelevant) {
+      pReleventSum += p;
+    }
+    if (n<pAt.length) {
+      pAt[n] = p;
+    }
+    recall = maxGoodPoints<=0 ? p : numGoodPoints/maxGoodPoints;
+    docNamesExtractTime += docNameExtractTime;
+  }
+
+  /**
+   * Return the precision at rank n:
+   * |{relevant hits within first <code>n</code> hits}| / <code>n</code>.
+   * @param n requested precision point, must be at least 1 and at most {@link #MAX_POINTS}. 
+   */
+  public double getPrecisionAt(int n) {
+    if (n<1 || n>MAX_POINTS) {
+      throw new IllegalArgumentException("n="+n+" - but it must be in [1,"+MAX_POINTS+"] range!"); 
+    }
+    if (n>numPoints) {
+      return (numPoints * pAt[(int)numPoints])/n;
+    }
+    return pAt[n];
+  }
+
+  /**
+   * Return the average precision at recall points: sum of precision at recall points / maxGoodPoints.
+   */
+  public double getAvp() {
+    return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints;
+  }
+  
+  /**
+   * Return the recall: |{relevant hits}| / |{hits}|.
+   */
+  public double getRecall() {
+    return recall;
+  }
+
+  /**
+   * Log information on this QualityStats object.
+   * @param logger Logger.
+   * @param prefix prefix before each log line.
+   */
+  public void log(String title, int paddLines, PrintWriter logger, String prefix) {
+    for (int i=0; i<paddLines; i++) {  
+      logger.println();
+    }
+    if (title!=null && title.trim().length()>0) {
+      logger.println(title);
+    }
+    prefix = prefix==null ? "" : prefix;
+    NumberFormat nf = NumberFormat.getInstance();
+    nf.setMaximumFractionDigits(3);
+    nf.setMinimumFractionDigits(3);
+    nf.setGroupingUsed(true);
+    int M = 19;
+    logger.println(prefix+format("Search Seconds: ",M)+
+        fracFormat(nf.format((double)searchTime/1000)));
+    logger.println(prefix+format("DocName Seconds: ",M)+
+        fracFormat(nf.format((double)docNamesExtractTime/1000)));
+    logger.println(prefix+format("Num Points: ",M)+
+        fracFormat(nf.format(numPoints)));
+    logger.println(prefix+format("Num Good Points: ",M)+
+        fracFormat(nf.format(numGoodPoints)));
+    logger.println(prefix+format("Max Good Points: ",M)+
+        fracFormat(nf.format(maxGoodPoints)));
+    logger.println(prefix+format("Average Precision: ",M)+
+        fracFormat(nf.format(getAvp())));
+    logger.println(prefix+format("Recall: ",M)+
+        fracFormat(nf.format(getRecall())));
+    for (int i=1; i<(int)numPoints && i<pAt.length; i++) {
+      logger.println(prefix+format("Precision At "+i+": ",M)+
+          fracFormat(nf.format(getPrecisionAt(i))));
+    }
+    for (int i=0; i<paddLines; i++) {  
+      logger.println();
+    }
+  }
+
+  private static String padd = "                                    ";
+  private String format(String s, int minLen) {
+    s = (s==null ? "" : s);
+    int n = Math.max(minLen,s.length());
+    return (s+padd).substring(0,n);
+  }
+  private String fracFormat(String frac) {
+    int k = frac.indexOf('.');
+    String s1 = padd+frac.substring(0,k);
+    int n = Math.max(k,6);
+    s1 = s1.substring(s1.length()-n);
+    return s1 + frac.substring(k);
+  }
+  
+  /**
+   * Create a QualityStats object that is the average of the input QualityStats objects. 
+   * @param stats array of input stats to be averaged.
+   * @return an average over the input stats.
+   */
+  public static QualityStats average(QualityStats[] stats) {
+    QualityStats avg = new QualityStats(0,0);
+    int m = 0; // queries with positive judgements
+    // aggregate
+    for (int i=0; i<stats.length; i++) {
+      avg.searchTime += stats[i].searchTime;
+      avg.docNamesExtractTime += stats[i].docNamesExtractTime;
+      if (stats[i].maxGoodPoints>0) {
+        m++;
+        avg.numGoodPoints += stats[i].numGoodPoints;
+        avg.numPoints += stats[i].numPoints;
+        avg.pReleventSum += stats[i].getAvp();
+        avg.recall += stats[i].recall;
+        avg.maxGoodPoints += stats[i].maxGoodPoints;
+        for (int j=1; j<avg.pAt.length; j++) {
+          avg.pAt[j] += stats[i].getPrecisionAt(j);
+        }
+      }
+    }
+    assert m>0 : "Fishy: no \"good\" queries!";
+    // take average: times go by all queries, other meassures go by "good" queries noly.
+    avg.searchTime /= stats.length;
+    avg.docNamesExtractTime /= stats.length;
+    avg.numGoodPoints /= m;
+    avg.numPoints /= m;
+    avg.recall /= m;
+    avg.maxGoodPoints /= m;
+    for (int j=1; j<avg.pAt.length; j++) {
+      avg.pAt[j] /= m;
+    }
+    avg.pReleventSum /= m;                 // this is actually avgp now 
+    avg.pReleventSum *= avg.maxGoodPoints; // so that getAvgP() would be correct
+    
+    return avg;
+  }
+
+  /**
+   * Returns the time it took to extract doc names for judging the measured query, in milliseconds.
+   */
+  public long getDocNamesExtractTime() {
+    return docNamesExtractTime;
+  }
+
+  /**
+   * Returns the maximal number of good points.
+   * This is the number of relevant docs known by the judge for the measured query.
+   */
+  public double getMaxGoodPoints() {
+    return maxGoodPoints;
+  }
+
+  /**
+   * Returns the number of good points (only relevant points).
+   */
+  public double getNumGoodPoints() {
+    return numGoodPoints;
+  }
+
+  /**
+   * Returns the number of points (both relevant and irrelevant points).
+   */
+  public double getNumPoints() {
+    return numPoints;
+  }
+
+  /**
+   * Returns the recallPoints.
+   */
+  public RecallPoint [] getRecallPoints() {
+    return (RecallPoint[]) recallPoints.toArray(new RecallPoint[0]);
+  }
+
+  /**
+   * Returns the search time in milliseconds for the measured query.
+   */
+  public long getSearchTime() {
+    return searchTime;
+  }
+
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/package.html
@ -0,0 +1,65 @@
+<html>
+<body>
+<h2>Search Quality Benchmarking.</h2>
+<p>
+This package allows to benchmark search quality of a Lucene application.
+<p>
+In order to use this package you should provide:
+<ul>
+  <li>A <a href="../../search/Searcher.html">searcher</a>.</li>
+  <li><a href="QualityQuery.html">Quality queries</a>.</li>
+  <li><a href="Judge.html">Judging object</a>.</li>
+  <li><a href="utils/SubmissionReport.html">Reporting object</a>.</li>
+</ul>
+<p>
+For benchmarking TREC collections with TREC QRels, take a look at the 
+<a href="trec/package-summary.html">trec package</a>.
+<p>
+Here is a sample code used to run the TREC 2006 queries 701-850 on the .Gov2 collection:
+
+<pre>
+    File topicsFile = new File("topics-701-850.txt");
+    File qrelsFile = new File("qrels-701-850.txt");
+    Searcher searcher = new IndexSearcher("index");
+
+    int maxResults = 1000;
+    String docNameField = "docname"; 
+    
+    PrintWriter logger = new PrintWriter(System.out,true); 
+
+    // use trec utilities to read trec topics into quality queries
+    TrecTopicsReader qReader = new TrecTopicsReader();
+    QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+    
+    // prepare judge, with trec utilities that read from a QRels file
+    Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+    
+    // validate topics & judgments match each other
+    judge.validateData(qqs, logger);
+    
+    // set the parsing of quality queries into Lucene queries.
+    QualityQueryParser qqParser = new SimpleQQParser("title", "body");
+    
+    // run the benchmark
+    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+    SubmissionReport submitLog = null;
+    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+    
+    // print an avarage sum of the results
+    QualityStats avg = QualityStats.average(stats);
+    avg.log("SUMMARY",2,logger, "  ");
+</pre>
+
+<p>
+Some immediate ways to modify this program to your needs are:
+<ul>
+  <li>To run on different formats of queries and judgements provide your own 
+      <a href="Judge.html">Judge</a> and 
+      <a href="QualityQuery.html">Quality queries</a>.</li>
+  <li>Create sophisticated Lucene queries by supplying a different 
+  <a href="QualityQueryParser.html">Quality query parser</a>.</li>
+</ul>
+
+</body>
+
+</html>
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecJudge.java
@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.trec;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.StringTokenizer;
+
+import org.apache.lucene.benchmark.quality.Judge;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+
+/**
+ * Judge if given document is relevant to given quality query, based on Trec format for judgements.
+ */
+public class TrecJudge implements Judge {
+
+  HashMap judgements;
+  
+  /**
+   * Constructor from a reader.
+   * <p>
+   * Expected input format:
+   * <pre>
+   *     qnum  0   doc-name     is-relevant
+   * </pre> 
+   * Two sample lines:
+   * <pre> 
+   *     19    0   doc303       1
+   *     19    0   doc7295      0
+   * </pre> 
+   * @param reader where judgments are read from.
+   * @throws IOException 
+   */
+  public TrecJudge (BufferedReader reader) throws IOException {
+    judgements = new HashMap();
+    QRelJudgement curr = null;
+    String zero = "0";
+    String line;
+    
+    try {
+      while (null!=(line=reader.readLine())) {
+        line = line.trim();
+        if (line.length()==0 || '#'==line.charAt(0)) {
+          continue;
+        }
+        StringTokenizer st = new StringTokenizer(line);
+        String queryID = st.nextToken();
+        st.nextToken();
+        String docName = st.nextToken();
+        boolean relevant = !zero.equals(st.nextToken());
+        assert !st.hasMoreTokens() : "wrong format: "+line+"  next: "+st.nextToken();
+        if (relevant) { // only keep relevant docs
+          if (curr==null || !curr.queryID.equals(queryID)) {
+            curr = (QRelJudgement)judgements.get(queryID);
+            if (curr==null) {
+              curr = new QRelJudgement(queryID);
+              judgements.put(queryID,curr);
+            }
+          }
+          curr.addRelevandDoc(docName);
+        }
+      }
+    } finally {
+      reader.close();
+    }
+  }
+  
+  // inherit javadocs
+  public boolean isRelevant(String docName, QualityQuery query) {
+    QRelJudgement qrj = (QRelJudgement) judgements.get(query.getQueryID());
+    return qrj!=null && qrj.isRelevant(docName);
+  }
+
+  /** single Judgement of a trec quality query */
+  private static class QRelJudgement {
+    private String queryID;
+    private HashMap relevantDocs;
+    
+    QRelJudgement(String queryID) {
+      this.queryID = queryID;
+      relevantDocs = new HashMap();
+    }
+    
+    public void addRelevandDoc(String docName) {
+      relevantDocs.put(docName,docName);
+    }
+
+    boolean isRelevant(String docName) {
+      return relevantDocs.containsKey(docName);
+    }
+
+    public int maxRecall() {
+      return relevantDocs.size();
+    }
+  }
+
+  // inherit javadocs
+  public boolean validateData(QualityQuery[] qq, PrintWriter logger) {
+    HashMap missingQueries = (HashMap) judgements.clone();
+    ArrayList missingJudgements = new ArrayList();
+    for (int i=0; i<qq.length; i++) {
+      String id = qq[i].getQueryID();
+      if (missingQueries.containsKey(id)) {
+        missingQueries.remove(id);
+      } else {
+        missingJudgements.add(id);
+      }
+    }
+    boolean isValid = true;
+    if (missingJudgements.size()>0) {
+      isValid = false;
+      if (logger!=null) {
+        logger.println("WARNING: "+missingJudgements.size()+" queries have no judgments! - ");
+        for (int i=0; i<missingJudgements.size(); i++) {
+          logger.println("   "+(String)missingJudgements.get(i));
+        }
+      }
+    }
+    if (missingQueries.size()>0) {
+      isValid = false;
+      if (logger!=null) {
+        logger.println("WARNING: "+missingQueries.size()+" judgments match no query! - ");
+        for (Iterator it = missingQueries.keySet().iterator(); it.hasNext();) {
+          String id = (String) it.next();
+          logger.println("   "+id);
+        }
+      }
+    }
+    return isValid;
+  }
+
+  // inherit javadocs
+  public int maxRecall(QualityQuery query) {
+    QRelJudgement qrj = (QRelJudgement) judgements.get(query.getQueryID());
+    if (qrj!=null) {
+      return qrj.maxRecall();
+    }
+    return 0;
+  }
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.trec;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+import org.apache.lucene.benchmark.quality.QualityQuery;
+
+/**
+ * Read TREC topics.
+ * <p>
+ * Expects this topic format -
+ * <pre>
+ *   &lt;top&gt;
+ *   &lt;num&gt; Number: nnn
+ *     
+ *   &lt;title&gt; title of the topic
+ *     
+ *   &lt;desc&gt; Description:
+ *   description of the topic
+ *     
+ *   &lt;narr&gt; Narrative:
+ *   "story" composed by assessors.
+ *    
+ *   &lt;/top&gt;
+ * </pre>
+ * Comment lines starting with '#' are ignored.
+ */
+public class TrecTopicsReader {
+
+  private static final String newline = System.getProperty("line.separator");
+  
+  /**
+   *  Constructor for Trec's TopicsReader
+   */
+  public TrecTopicsReader() {
+    super();
+  }
+
+  /**
+   * Read quality queries from trec format topics file.
+   * @param reader where queries are read from.
+   * @return the result quality queries.
+   * @throws IOException if cannot read the queries.
+   */
+  public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
+    ArrayList res = new ArrayList();
+    StringBuffer sb;
+    try {
+      while (null!=(sb=read(reader,"<top>",null,false,false))) {
+        HashMap fields = new HashMap();
+        // id
+        sb = read(reader,"<num>",null,true,false);
+        int k = sb.indexOf(":");
+        String id = sb.substring(k+1).trim();
+        // title
+        sb = read(reader,"<title>",null,true,false);
+        k = sb.indexOf(">");
+        String title = sb.substring(k+1).trim();
+        // description
+        sb = read(reader,"<desc>",null,false,false);
+        sb = read(reader,"<narr>",null,false,true);
+        String descripion = sb.toString().trim();
+        // we got a topic!
+        fields.put("title",title);
+        fields.put("description",descripion);
+        QualityQuery topic = new QualityQuery(id,fields);
+        res.add(topic);
+        // skip narrative, get to end of doc
+        read(reader,"</top>",null,false,false);
+      }
+    } finally {
+      reader.close();
+    }
+    // sort result array (by ID) 
+    QualityQuery qq[] = (QualityQuery[]) res.toArray(new QualityQuery[0]);
+    Arrays.sort(qq);
+    return qq;
+  }
+
+  // read until finding a line that starts with the specified prefix
+  private StringBuffer read (BufferedReader reader, String prefix, StringBuffer sb, boolean collectMatchLine, boolean collectAll) throws IOException {
+    sb = (sb==null ? new StringBuffer() : sb);
+    String sep = "";
+    while (true) {
+      String line = reader.readLine();
+      if (line==null) {
+        return null;
+      }
+      if (line.startsWith(prefix)) {
+        if (collectMatchLine) {
+          sb.append(sep+line);
+          sep = newline;
+        }
+        break;
+      }
+      if (collectAll) {
+        sb.append(sep+line);
+        sep = newline;
+      }
+    }
+    //System.out.println("read: "+sb);
+    return sb;
+  }
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/package.html
@ -0,0 +1,6 @@
+<html>
+<body>
+Utilities for Trec related quality benchmarking, feeding from Trec Topics and QRels inputs.
+</body>
+
+</html>
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import java.io.IOException;
+
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.FieldSelectorResult;
+import org.apache.lucene.search.Searcher;
+
+/**
+ * Utility: extract doc names from an index
+ */
+public class DocNameExtractor {
+
+  private FieldSelector fldSel;
+  private String docNameField;
+  
+  /**
+   * Constructor for DocNameExtractor.
+   * @param docNameField name of the stored field containing the doc name. 
+   */
+  public DocNameExtractor (final String docNameField) {
+    this.docNameField = docNameField;
+    fldSel = new FieldSelector() {
+      public FieldSelectorResult accept(String fieldName) {
+        return fieldName.equals(docNameField) ? 
+            FieldSelectorResult.LOAD_AND_BREAK :
+              FieldSelectorResult.NO_LOAD;
+      }
+    };
+  }
+  
+  /**
+   * Extract the name of the input doc from the index.
+   * @param searcher access to the index.
+   * @param docid ID of doc whose name is needed.
+   * @return the name of the input doc as extracted from the index.
+   * @throws IOException if cannot extract the doc name from the index.
+   */
+  public String docName(Searcher searcher, int docid) throws IOException {
+    return searcher.doc(docid,fldSel).get(docNameField);
+  }
+  
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.PriorityQueue;
+
+/**
+ * Suggest Quality queries based on an index contents.
+ * Utility class, used for making quality test benchmarks.
+ */
+public class QualityQueriesFinder {
+
+  private static final String newline = System.getProperty("line.separator");
+  private Directory dir;
+  
+  /**
+   * Constrctor over a directory containing the index.
+   * @param dir directory containing the index we search for the quality test. 
+   */
+  private QualityQueriesFinder(Directory dir) {
+    this.dir = dir;
+  }
+
+  /**
+   * @param args {index-dir}
+   * @throws IOException  if cannot access the index.
+   */
+  public static void main(String[] args) throws IOException {
+    if (args.length<1) {
+      System.err.println("Usage: java QualityQueriesFinder <index-dir>");
+      System.exit(1);
+    }
+    QualityQueriesFinder qqf = new QualityQueriesFinder(FSDirectory.getDirectory(new File(args[0])));
+    String q[] = qqf.bestQueries("body",20);
+    for (int i=0; i<q.length; i++) {
+      System.out.println(newline+formatQueryAsTrecTopic(i,q[i],null,null));
+    }
+  }
+
+  private String [] bestQueries(String field,int numQueries) throws IOException {
+    String words[] = bestTerms("body",4*numQueries);
+    int n = words.length;
+    int m = n/4;
+    String res[] = new String[m];
+    for (int i=0; i<res.length; i++) {
+      res[i] = words[i] + " " + words[m+i]+ "  " + words[n-1-m-i]  + " " + words[n-1-i];
+      //System.out.println("query["+i+"]:  "+res[i]);
+    }
+    return res;
+  }
+  
+  private static String formatQueryAsTrecTopic (int qnum, String title, String description, String narrative) {
+    return 
+      "<top>" + newline +
+      "<num> Number: " + qnum             + newline + newline + 
+      "<title> " + (title==null?"":title) + newline + newline + 
+      "<desc> Description:"               + newline +
+      (description==null?"":description)  + newline + newline +
+      "<narr> Narrative:"                 + newline +
+      (narrative==null?"":narrative)      + newline + newline +
+      "</top>";
+  }
+  
+  private String [] bestTerms(String field,int numTerms) throws IOException {
+    PriorityQueue pq = new TermsDfQueue(numTerms);
+    IndexReader ir = IndexReader.open(dir);
+    try {
+      int threshold = ir.maxDoc() / 10; // ignore words too common.
+      TermEnum terms = ir.terms(new Term(field,""));
+      while (terms.next()) {
+        if (!field.equals(terms.term().field())) {
+          break;
+        }
+        int df = terms.docFreq();
+        if (df<threshold) {
+          String ttxt = terms.term().text();
+          pq.insert(new TermDf(ttxt,df));
+        }
+      }
+    } finally {
+      ir.close();
+    }
+    String res[] = new String[pq.size()];
+    int i = 0;
+    while (pq.size()>0) {
+      TermDf tdf = (TermDf) pq.pop(); 
+      res[i++] = tdf.word;
+      System.out.println(i+".   word:  "+tdf.df+"   "+tdf.word);
+    }
+    return res;
+  }
+
+  private static class TermDf {
+    String word;
+    int df;
+    TermDf (String word, int freq) {
+      this.word = word;
+      this.df = freq;
+    }
+  }
+  
+  private static class TermsDfQueue extends PriorityQueue {
+    TermsDfQueue (int maxSize) {
+      initialize(maxSize);
+    }
+    protected boolean lessThan(Object a, Object b) {
+      TermDf tf1 = (TermDf) a;
+      TermDf tf2 = (TermDf) b;
+      return tf1.df < tf2.df;
+    }
+  }
+  
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SimpleQQParser.java
@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+import org.apache.lucene.benchmark.quality.QualityQueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+/**
+ * Simplistic quality query parser. A Lucene query is created by passing 
+ * the value of the specified QualityQuery name-value pair into 
+ * a Lucene's QueryParser using StandardAnalyzer. */
+public class SimpleQQParser implements QualityQueryParser {
+
+  private String qqName;
+  private String indexField;
+  ThreadLocal queryParser = new ThreadLocal();
+
+  /**
+   * Constructor of a simple qq parser.
+   * @param qqName name-value pair of quality query to use for creating the query
+   * @param indexField corresponding index field  
+   */
+  public SimpleQQParser(String qqName, String indexField) {
+    this.qqName = qqName;
+    this.indexField = indexField;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.benchmark.quality.QualityQueryParser#parse(org.apache.lucene.benchmark.quality.QualityQuery)
+   */
+  public Query parse(QualityQuery qq) throws ParseException {
+    QueryParser qp = (QueryParser) queryParser.get();
+    if (qp==null) {
+      qp = new QueryParser(indexField, new StandardAnalyzer());
+      queryParser.set(qp);
+    }
+    return qp.parse(qq.getValue(qqName));
+  }
+
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.utils;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.text.NumberFormat;
+
+import org.apache.lucene.benchmark.quality.QualityQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TopDocs;
+
+/**
+ * Create a log ready for submission.
+ * Extend this class and override
+ * {@link #report(QualityQuery, TopDocs, String, Searcher)}
+ * to create different reports. 
+ */
+public class SubmissionReport {
+
+  private NumberFormat nf;
+  private PrintWriter logger;
+  
+  /**
+   * Constructor for SubmissionReport.
+   * @param logger if null, no submission data is created. 
+   */
+  public SubmissionReport (PrintWriter logger) {
+    this.logger = logger;
+    nf = NumberFormat.getInstance();
+    nf.setMaximumFractionDigits(4);
+    nf.setMinimumFractionDigits(4);
+  }
+  
+  /**
+   * Report a search result for a certain quality query.
+   * @param qq quality query for which the results are reported.
+   * @param td search results for the query.
+   * @param docNameField stored field used for fetching the result doc name.  
+   * @param searcher index access for fetching doc name.
+   * @throws IOException in case of a problem.
+   */
+  public void report(QualityQuery qq, TopDocs td, String docNameField, Searcher searcher) throws IOException {
+    if (logger==null) {
+      return;
+    }
+    ScoreDoc sd[] = td.scoreDocs;
+    String sep = " \t ";
+    DocNameExtractor xt = new DocNameExtractor(docNameField);
+    for (int i=0; i<sd.length; i++) {
+      String docName = xt.docName(searcher,sd[i].doc);
+      logger.println(
+          qq.getQueryID()       + sep +
+          '0'                   + sep +
+          format(docName,20)    + sep +
+          format(""+i,7)        + sep +
+          nf.format(sd[i].score)
+          );
+    }
+  }
+
+  private static String padd = "                                    ";
+  private String format(String s, int minLen) {
+    s = (s==null ? "" : s);
+    int n = Math.max(minLen,s.length());
+    return (s+padd).substring(0,n);
+  }
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/package.html
@ -0,0 +1,6 @@
+<html>
+<body>
+Miscellaneous utilities for search quality benchmarking: query parsing, submission reports.
+</body>
+
+</html>
--- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
+++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@ -23,6 +23,9 @@ import java.io.FileReader;
 import java.io.BufferedReader;

 import org.apache.lucene.benchmark.byTask.Benchmark;
+import org.apache.lucene.benchmark.byTask.feeds.DocData;
+import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
+import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
 import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
@ -135,8 +138,8 @@ public class TestPerfTasksLogic extends TestCase {
    // 1. alg definition (required in every "logic" test)
    String algLines[] = {
        "# ----- properties ",
-        "doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
-        "doc.add.log.step=2697",
+        "doc.maker="+Reuters20DocMaker.class.getName(),
+        "doc.add.log.step=3",
        "doc.term.vector=false",
        "doc.maker.forever=false",
        "directory=FSDirectory",
@ -153,7 +156,7 @@ public class TestPerfTasksLogic extends TestCase {

    // 3. test number of docs in the index
    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
-    int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton.
+    int ndocsExpected = 20; // Reuters20DocMaker exhausts after 20 docs.
    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
    ir.close();
  }
@ -221,7 +224,7 @@ public class TestPerfTasksLogic extends TestCase {
  }
  
  // create the benchmark and execute it. 
-  private Benchmark execBenchmark(String[] algLines) throws Exception {
+  public static Benchmark execBenchmark(String[] algLines) throws Exception {
    String algText = algLinesToText(algLines);
    logTstLogic(algText);
    Benchmark benchmark = new Benchmark(new StringReader(algText));
@ -230,7 +233,7 @@ public class TestPerfTasksLogic extends TestCase {
  }
  
  // catenate alg lines to make the alg text
-  private String algLinesToText(String[] algLines) {
+  private static String algLinesToText(String[] algLines) {
    String indent = "  ";
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < propLines.length; i++) {
@ -242,11 +245,22 @@ public class TestPerfTasksLogic extends TestCase {
    return sb.toString();
  }

-  private void logTstLogic (String txt) {
+  private static void logTstLogic (String txt) {
    if (!DEBUG) 
      return;
    System.out.println("Test logic of:");
    System.out.println(txt);
  }

+  /** use reuters and the exhaust mechanism, but to be faster, add 20 docs only... */
+  public static class Reuters20DocMaker extends ReutersDocMaker {
+    private int nDocs=0;
+    protected DocData getNextDocData() throws Exception {
+      if (nDocs>=20 && !forever) {
+        throw new NoMoreDataException();
+      }
+      nDocs++;
+      return super.getNextDocData();
+    }
+  }
 }
--- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
+++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
@ -0,0 +1,174 @@
+package org.apache.lucene.benchmark.quality;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.PrintWriter;
+
+import org.apache.lucene.benchmark.byTask.TestPerfTasksLogic;
+import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
+import org.apache.lucene.benchmark.quality.Judge;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+import org.apache.lucene.benchmark.quality.QualityQueryParser;
+import org.apache.lucene.benchmark.quality.QualityBenchmark;
+import org.apache.lucene.benchmark.quality.trec.TrecJudge;
+import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
+import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
+import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.FSDirectory;
+
+import junit.framework.TestCase;
+
+/**
+ * Test that quality run does its job.
+ */
+public class TestQualityRun extends TestCase {
+
+  private static boolean DEBUG = Boolean.getBoolean("tests.verbose");
+  
+  /**
+   * @param arg0
+   */
+  public TestQualityRun(String name) {
+    super(name);
+  }
+
+  public void testTrecQuality() throws Exception {
+    // first create the complete reuters index
+    createReutersIndex();
+    
+    File workDir = new File(System.getProperty("benchmark.work.dir","work"));
+    assertTrue("Bad workDir: "+workDir, workDir.exists()&& workDir.isDirectory());
+
+    int maxResults = 1000;
+    String docNameField = "docid"; 
+    
+    PrintWriter logger = DEBUG ? new PrintWriter(System.out,true) : null;
+
+    // <tests src dir> for topics/qrels files - src/test/org/apache/lucene/benchmark/quality
+    File srcTestDir = new File(new File(new File(new File(new File(
+      new File(new File(workDir.getAbsoluteFile().getParentFile(),
+        "src"),"test"),"org"),"apache"),"lucene"),"benchmark"),"quality");
+    
+    // prepare topics
+    File topicsFile = new File(srcTestDir, "trecTopics.txt");
+    assertTrue("Bad topicsFile: "+topicsFile, topicsFile.exists()&& topicsFile.isFile());
+    TrecTopicsReader qReader = new TrecTopicsReader();
+    QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+    
+    // prepare judge
+    File qrelsFile = new File(srcTestDir, "trecQRels.txt");
+    assertTrue("Bad qrelsFile: "+qrelsFile, qrelsFile.exists()&& qrelsFile.isFile());
+    Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+    
+    // validate topics & judgments match each other
+    judge.validateData(qqs, logger);
+    
+    IndexSearcher searcher = new IndexSearcher(FSDirectory.getDirectory(new File(workDir,"index")));
+
+    QualityQueryParser qqParser = new SimpleQQParser("title","body");
+    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+    
+    SubmissionReport submitLog = DEBUG ? new SubmissionReport(logger) : null;
+    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+    
+    // --------- verify by the way judgments were altered for this test:
+    // for some queries, depending on m = qnum % 8
+    // m==0: avg_precision and recall are hurt, by marking fake docs as relevant
+    // m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
+    // m==2: all precision, precision_at_n and recall are hurt.
+    // m>=3: these queries remain perfect
+    for (int i = 0; i < stats.length; i++) {
+      QualityStats s = stats[i];
+      switch (i%8) {
+
+      case 0:
+        assertTrue("avg-p should be hurt: "+s.getAvp(), 1.0 > s.getAvp());
+        assertTrue("recall should be hurt: "+s.getRecall(), 1.0 > s.getRecall());
+        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-9);
+        }
+        break;
+      
+      case 1:
+        assertTrue("avg-p should be hurt", 1.0 > s.getAvp());
+        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-9);
+        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+          assertTrue("p_at_"+j+" should be hurt: "+s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
+        }
+        break;
+
+      case 2:
+        assertTrue("avg-p should be hurt: "+s.getAvp(), 1.0 > s.getAvp());
+        assertTrue("recall should be hurt: "+s.getRecall(), 1.0 > s.getRecall());
+        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+          assertTrue("p_at_"+j+" should be hurt: "+s.getPrecisionAt(j), 1.0 > s.getPrecisionAt(j));
+        }
+        break;
+
+      default: {
+        assertEquals("avg-p should be perfect: "+s.getAvp(), 1.0, s.getAvp(), 1E-9);
+        assertEquals("recall should be perfect: "+s.getRecall(), 1.0, s.getRecall(), 1E-9);
+        for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+          assertEquals("p_at_"+j+" should be perfect: "+s.getPrecisionAt(j), 1.0, s.getPrecisionAt(j), 1E-9);
+        }
+      }
+      
+      }
+    }
+    
+    QualityStats avg = QualityStats.average(stats);
+    if (logger!=null) {
+      avg.log("Average statistis:",1,logger,"  ");
+    }
+    
+    assertTrue("mean avg-p should be hurt: "+avg.getAvp(), 1.0 > avg.getAvp());
+    assertTrue("avg recall should be hurt: "+avg.getRecall(), 1.0 > avg.getRecall());
+    for (int j = 1; j <= QualityStats.MAX_POINTS; j++) {
+      assertTrue("avg p_at_"+j+" should be hurt: "+avg.getPrecisionAt(j), 1.0 > avg.getPrecisionAt(j));
+    }
+
+    
+  }
+
+  // use benchmark logic to create the full Reuters index
+  private void createReutersIndex() throws Exception {
+    // 1. alg definition
+    String algLines[] = {
+        "# ----- properties ",
+        "doc.maker="+ReutersDocMaker.class.getName(),
+        "doc.add.log.step=2500",
+        "doc.term.vector=false",
+        "doc.maker.forever=false",
+        "directory=FSDirectory",
+        "doc.stored=true",
+        "doc.tokenized=true",
+        "# ----- alg ",
+        "ResetSystemErase",
+        "CreateIndex",
+        "{ AddDoc } : *",
+        "CloseIndex",
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    TestPerfTasksLogic.execBenchmark(algLines);
+  }
+}
--- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt
+++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt
@ -0,0 +1,723 @@
+# -----------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------
+
+# ------------------------------------------------------------
+# Format:
+#
+#       qnum   0   doc-name     is-relevant
+#
+#
+# The origin of this file was created using 
+# utils.QualityQueriesFinder, so all queries 
+# would have perfect 1.0 for all meassures.
+#
+# To make it suitable for testing it was modified
+# for some queries, depending on m = qnum % 8
+# m==0: avg_precision and recall are hurt, by marking fake docs as relevant
+# m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
+# m==2: all precision, precision_at_n and recall are hurt.
+# m>=3: these queries remain perfect
+# ------------------------------------------------------------
+
+# --- m==0: avg_precision and recall are hurt, by marking fake docs as relevant
+
+0 	 0 	 fakedoc1             	 1
+0 	 0 	 fakedoc2             	 1
+0 	 0 	 fakedoc3             	 1
+0 	 0 	 fakedoc4             	 1
+
+0 	 0 	 doc18211             	 1
+0 	 0 	 doc20192             	 1
+0 	 0 	 doc7401              	 1
+0 	 0 	 doc11285             	 1
+0 	 0 	 doc20647             	 1
+0 	 0 	 doc3057              	 1
+0 	 0 	 doc12431             	 1
+0 	 0 	 doc4989              	 1
+0 	 0 	 doc17324             	 1
+0 	 0 	 doc4030              	 1
+0 	 0 	 doc4290              	 1
+0 	 0 	 doc3462              	 1
+0 	 0 	 doc15313             	 1
+0 	 0 	 doc10303             	 1
+0 	 0 	 doc1893              	 1
+0 	 0 	 doc5008              	 1
+0 	 0 	 doc14634             	 1
+0 	 0 	 doc5471              	 1
+0 	 0 	 doc17904             	 1
+0 	 0 	 doc7168              	 1
+0 	 0 	 doc21275             	 1
+0 	 0 	 doc9011              	 1
+0 	 0 	 doc17546             	 1
+0 	 0 	 doc9102              	 1
+0 	 0 	 doc13199             	 1
+
+# --- m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
+
+1 	 0 	 doc9857              	 0
+1 	 0 	 doc16846             	 1
+1 	 0 	 doc4320              	 1
+1 	 0 	 doc9501              	 0
+1 	 0 	 doc10159             	 1
+1 	 0 	 doc16642             	 1
+1 	 0 	 doc17536             	 0
+1 	 0 	 doc17571             	 1
+1 	 0 	 doc18728             	 1
+1 	 0 	 doc18828             	 1
+1 	 0 	 doc19108             	 0
+1 	 0 	 doc9940              	 1
+1 	 0 	 doc11852             	 1
+1 	 0 	 doc7430              	 0
+1 	 0 	 doc19162             	 1
+1 	 0 	 doc1743              	 1
+1 	 0 	 doc2137              	 1
+1 	 0 	 doc7611              	 1
+1 	 0 	 doc8072              	 1
+1 	 0 	 doc12764             	 1
+1 	 0 	 doc2593              	 1
+1 	 0 	 doc11088             	 1
+1 	 0 	 doc931               	 1
+1 	 0 	 doc7673              	 1
+1 	 0 	 doc12941             	 1
+1 	 0 	 doc11797             	 1
+1 	 0 	 doc11831             	 1
+1 	 0 	 doc13162             	 1
+1 	 0 	 doc4423              	 1
+1 	 0 	 doc5217              	 1
+
+# ---- m==2: all precision, precision_at_n and recall are hurt.
+
+2 	 0 	 fakedoc1             	 1
+2 	 0 	 fakedoc2             	 1
+2 	 0 	 fakedoc3             	 1
+2 	 0 	 fakedoc4             	 1
+
+2 	 0 	 doc3137              	 0
+2 	 0 	 doc7142              	 0
+2 	 0 	 doc13667             	 0
+2 	 0 	 doc13171             	 0
+2 	 0 	 doc13372             	 1
+2 	 0 	 doc21415             	 1
+2 	 0 	 doc16298             	 1
+2 	 0 	 doc14957             	 1
+2 	 0 	 doc153               	 1
+2 	 0 	 doc16092             	 1
+2 	 0 	 doc16096             	 1
+2 	 0 	 doc21303             	 1
+2 	 0 	 doc18681             	 1
+2 	 0 	 doc20756             	 1
+2 	 0 	 doc355               	 1
+2 	 0 	 doc13395             	 1
+2 	 0 	 doc5009              	 1
+2 	 0 	 doc17164             	 1
+2 	 0 	 doc13162             	 1
+2 	 0 	 doc11757             	 1
+2 	 0 	 doc9637              	 1
+2 	 0 	 doc18087             	 1
+2 	 0 	 doc4593              	 1
+2 	 0 	 doc4677              	 1
+2 	 0 	 doc20865             	 1
+2 	 0 	 doc8556              	 1
+2 	 0 	 doc2578              	 1
+2 	 0 	 doc1163              	 1
+2 	 0 	 doc3797              	 1
+2 	 0 	 doc11094             	 1
+
+
+3 	 0 	 doc19578             	 1
+3 	 0 	 doc14860             	 1
+3 	 0 	 doc7235              	 1
+3 	 0 	 doc20590             	 1
+3 	 0 	 doc17933             	 1
+3 	 0 	 doc9384              	 1
+3 	 0 	 doc10783             	 1
+3 	 0 	 doc1963              	 1
+3 	 0 	 doc18356             	 1
+3 	 0 	 doc13254             	 1
+3 	 0 	 doc18402             	 1
+3 	 0 	 doc15241             	 1
+3 	 0 	 doc3303              	 1
+3 	 0 	 doc8868              	 1
+3 	 0 	 doc18520             	 1
+3 	 0 	 doc4650              	 1
+3 	 0 	 doc4727              	 1
+3 	 0 	 doc21518             	 1
+3 	 0 	 doc5060              	 1
+3 	 0 	 doc7587              	 1
+3 	 0 	 doc2990              	 1
+3 	 0 	 doc8042              	 1
+3 	 0 	 doc6304              	 1
+3 	 0 	 doc13223             	 1
+3 	 0 	 doc1964              	 1
+3 	 0 	 doc10597             	 1
+3 	 0 	 doc21023             	 1
+3 	 0 	 doc19057             	 1
+3 	 0 	 doc14948             	 1
+3 	 0 	 doc9692              	 1
+
+
+4 	 0 	 doc2534              	 1
+4 	 0 	 doc21388             	 1
+4 	 0 	 doc20923             	 1
+4 	 0 	 doc11547             	 1
+4 	 0 	 doc19755             	 1
+4 	 0 	 doc3793              	 1
+4 	 0 	 doc6714              	 1
+4 	 0 	 doc12722             	 1
+4 	 0 	 doc5552              	 1
+4 	 0 	 doc6810              	 1
+4 	 0 	 doc16953             	 1
+4 	 0 	 doc2527              	 1
+4 	 0 	 doc5361              	 1
+4 	 0 	 doc12353             	 1
+4 	 0 	 doc7308              	 1
+4 	 0 	 doc3836              	 1
+4 	 0 	 doc2293              	 1
+4 	 0 	 doc7348              	 1
+4 	 0 	 doc17119             	 1
+4 	 0 	 doc19331             	 1
+4 	 0 	 doc3411              	 1
+4 	 0 	 doc14643             	 1
+4 	 0 	 doc9058              	 1
+4 	 0 	 doc11099             	 1
+4 	 0 	 doc12485             	 1
+4 	 0 	 doc16432             	 1
+4 	 0 	 doc10047             	 1
+4 	 0 	 doc13788             	 1
+4 	 0 	 doc117               	 1
+4 	 0 	 doc638               	 1
+
+
+
+5 	 0 	 doc169               	 1
+5 	 0 	 doc13181             	 1
+5 	 0 	 doc4350              	 1
+5 	 0 	 doc10242             	 1
+5 	 0 	 doc955               	 1
+5 	 0 	 doc5389              	 1
+5 	 0 	 doc17122             	 1
+5 	 0 	 doc17417             	 1
+5 	 0 	 doc12199             	 1
+5 	 0 	 doc6918              	 1
+5 	 0 	 doc3857              	 1
+5 	 0 	 doc2981              	 1
+5 	 0 	 doc10639             	 1
+5 	 0 	 doc10478             	 1
+5 	 0 	 doc8573              	 1
+5 	 0 	 doc9197              	 1
+5 	 0 	 doc9298              	 1
+5 	 0 	 doc2492              	 1
+5 	 0 	 doc10262             	 1
+5 	 0 	 doc5180              	 1
+5 	 0 	 doc11758             	 1
+5 	 0 	 doc4065              	 1
+5 	 0 	 doc9124              	 1
+5 	 0 	 doc11528             	 1
+5 	 0 	 doc18879             	 1
+5 	 0 	 doc17864             	 1
+5 	 0 	 doc3204              	 1
+5 	 0 	 doc12157             	 1
+5 	 0 	 doc4496              	 1
+5 	 0 	 doc20190             	 1
+
+
+
+6 	 0 	 doc9507              	 1
+6 	 0 	 doc15630             	 1
+6 	 0 	 doc8469              	 1
+6 	 0 	 doc11918             	 1
+6 	 0 	 doc20482             	 1
+6 	 0 	 doc20158             	 1
+6 	 0 	 doc19831             	 1
+6 	 0 	 doc8296              	 1
+6 	 0 	 doc8930              	 1
+6 	 0 	 doc16460             	 1
+6 	 0 	 doc2577              	 1
+6 	 0 	 doc15476             	 1
+6 	 0 	 doc1767              	 1
+6 	 0 	 doc689               	 1
+6 	 0 	 doc16606             	 1
+6 	 0 	 doc6149              	 1
+6 	 0 	 doc18691             	 1
+6 	 0 	 doc2208              	 1
+6 	 0 	 doc3592              	 1
+6 	 0 	 doc11199             	 1
+6 	 0 	 doc16329             	 1
+6 	 0 	 doc6007              	 1
+6 	 0 	 doc15231             	 1
+6 	 0 	 doc20622             	 1
+6 	 0 	 doc21468             	 1
+6 	 0 	 doc12230             	 1
+6 	 0 	 doc5723              	 1
+6 	 0 	 doc8120              	 1
+6 	 0 	 doc8668              	 1
+6 	 0 	 doc303               	 1
+
+
+
+
+7 	 0 	 doc7728              	 1
+7 	 0 	 doc7693              	 1
+7 	 0 	 doc21088             	 1
+7 	 0 	 doc5017              	 1
+7 	 0 	 doc10807             	 1
+7 	 0 	 doc16204             	 1
+7 	 0 	 doc2233              	 1
+7 	 0 	 doc3632              	 1
+7 	 0 	 doc4719              	 1
+7 	 0 	 doc6477              	 1
+7 	 0 	 doc6502              	 1
+7 	 0 	 doc6709              	 1
+7 	 0 	 doc7710              	 1
+7 	 0 	 doc9193              	 1
+7 	 0 	 doc9309              	 1
+7 	 0 	 doc9789              	 1
+7 	 0 	 doc10971             	 1
+7 	 0 	 doc18059             	 1
+7 	 0 	 doc19906             	 1
+7 	 0 	 doc20089             	 1
+7 	 0 	 doc20102             	 1
+7 	 0 	 doc21040             	 1
+7 	 0 	 doc21153             	 1
+7 	 0 	 doc9147              	 1
+7 	 0 	 doc9930              	 1
+7 	 0 	 doc19763             	 1
+7 	 0 	 doc1559              	 1
+7 	 0 	 doc21248             	 1
+7 	 0 	 doc17945             	 1
+7 	 0 	 doc526               	 1
+
+
+# --- m==0: avg_precision and recall are hurt, by marking fake docs as relevant
+
+8 	 0 	 fakedoc1             	 1
+8 	 0 	 fakedoc2             	 1
+8 	 0 	 fakedoc3             	 1
+8 	 0 	 fakedoc4             	 1
+
+8 	 0 	 doc16299             	 1
+8 	 0 	 doc1662              	 1
+8 	 0 	 doc4585              	 1
+8 	 0 	 doc12315             	 1
+8 	 0 	 doc16266             	 1
+8 	 0 	 doc13136             	 1
+8 	 0 	 doc19212             	 1
+8 	 0 	 doc7086              	 1
+8 	 0 	 doc7062              	 1
+8 	 0 	 doc6134              	 1
+8 	 0 	 doc13953             	 1
+8 	 0 	 doc16264             	 1
+8 	 0 	 doc2494              	 1
+8 	 0 	 doc10636             	 1
+8 	 0 	 doc10894             	 1
+8 	 0 	 doc6844              	 1
+8 	 0 	 doc674               	 1
+8 	 0 	 doc13520             	 1
+8 	 0 	 doc344               	 1
+8 	 0 	 doc2896              	 1
+8 	 0 	 doc11871             	 1
+8 	 0 	 doc1862              	 1
+8 	 0 	 doc16728             	 1
+8 	 0 	 doc10308             	 1
+8 	 0 	 doc2227              	 1
+8 	 0 	 doc13167             	 1
+8 	 0 	 doc20607             	 1
+8 	 0 	 doc9670              	 1
+8 	 0 	 doc1566              	 1
+8 	 0 	 doc17885             	 1
+
+
+# ---- m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
+
+
+9 	 0 	 doc1990              	 0
+9 	 0 	 doc9342              	 1
+9 	 0 	 doc19427             	 1
+9 	 0 	 doc12432             	 0
+9 	 0 	 doc13480             	 1
+9 	 0 	 doc3322              	 1
+9 	 0 	 doc16044             	 1
+9 	 0 	 doc266               	 0
+9 	 0 	 doc3437              	 1
+9 	 0 	 doc5370              	 1
+9 	 0 	 doc10314             	 1
+9 	 0 	 doc4892              	 1
+9 	 0 	 doc5763              	 0
+9 	 0 	 doc14045             	 1
+9 	 0 	 doc1090              	 1
+9 	 0 	 doc7437              	 1
+9 	 0 	 doc5822              	 1
+9 	 0 	 doc4285              	 1
+9 	 0 	 doc17119             	 1
+9 	 0 	 doc21001             	 1
+9 	 0 	 doc4337              	 1
+9 	 0 	 doc5967              	 1
+9 	 0 	 doc10214             	 1
+9 	 0 	 doc12001             	 1
+9 	 0 	 doc18553             	 1
+9 	 0 	 doc12116             	 1
+9 	 0 	 doc5064              	 1
+9 	 0 	 doc5018              	 1
+9 	 0 	 doc5037              	 1
+9 	 0 	 doc8025              	 1
+
+
+# ---- m==2: all precision, precision_at_n and recall are hurt.
+
+10 	 0 	 fakedoc1             	 1
+10 	 0 	 fakedoc2             	 1
+10 	 0 	 fakedoc3             	 1
+10 	 0 	 fakedoc4             	 1
+
+10 	 0 	 doc17218             	 0
+10 	 0 	 doc10270             	 0
+10 	 0 	 doc5958              	 0
+10 	 0 	 doc19943             	 0
+10 	 0 	 doc6510              	 1
+10 	 0 	 doc16087             	 1
+10 	 0 	 doc14893             	 1
+10 	 0 	 doc8933              	 1
+10 	 0 	 doc4354              	 1
+10 	 0 	 doc16729             	 1
+10 	 0 	 doc16761             	 1
+10 	 0 	 doc6964              	 1
+10 	 0 	 doc16743             	 1
+10 	 0 	 doc7357              	 1
+10 	 0 	 doc2534              	 1
+10 	 0 	 doc18321             	 1
+10 	 0 	 doc18497             	 1
+10 	 0 	 doc11214             	 1
+10 	 0 	 doc11819             	 1
+10 	 0 	 doc10818             	 1
+10 	 0 	 doc15769             	 1
+10 	 0 	 doc5348              	 1
+10 	 0 	 doc14948             	 1
+10 	 0 	 doc7891              	 1
+10 	 0 	 doc9897              	 1
+10 	 0 	 doc15559             	 1
+10 	 0 	 doc14935             	 1
+10 	 0 	 doc14954             	 1
+10 	 0 	 doc6621              	 1
+10 	 0 	 doc6930              	 1
+
+
+11 	 0 	 doc11943             	 1
+11 	 0 	 doc286               	 1
+11 	 0 	 doc1574              	 1
+11 	 0 	 doc17916             	 1
+11 	 0 	 doc17918             	 1
+11 	 0 	 doc19213             	 1
+11 	 0 	 doc9337              	 1
+11 	 0 	 doc8593              	 1
+11 	 0 	 doc8800              	 1
+11 	 0 	 doc18580             	 1
+11 	 0 	 doc209               	 1
+11 	 0 	 doc1893              	 1
+11 	 0 	 doc11189             	 1
+11 	 0 	 doc17702             	 1
+11 	 0 	 doc10180             	 1
+11 	 0 	 doc11869             	 1
+11 	 0 	 doc9705              	 1
+11 	 0 	 doc8715              	 1
+11 	 0 	 doc12753             	 1
+11 	 0 	 doc10195             	 1
+11 	 0 	 doc3552              	 1
+11 	 0 	 doc16030             	 1
+11 	 0 	 doc4623              	 1
+11 	 0 	 doc3188              	 1
+11 	 0 	 doc8735              	 1
+11 	 0 	 doc151               	 1
+11 	 0 	 doc5792              	 1
+11 	 0 	 doc5194              	 1
+11 	 0 	 doc3393              	 1
+11 	 0 	 doc19027             	 1
+
+
+
+12 	 0 	 doc18198             	 1
+12 	 0 	 doc2444              	 1
+12 	 0 	 doc4305              	 1
+12 	 0 	 doc6544              	 1
+12 	 0 	 doc11639             	 1
+12 	 0 	 doc10640             	 1
+12 	 0 	 doc12192             	 1
+12 	 0 	 doc128               	 1
+12 	 0 	 doc10760             	 1
+12 	 0 	 doc10881             	 1
+12 	 0 	 doc2698              	 1
+12 	 0 	 doc3552              	 1
+12 	 0 	 doc20524             	 1
+12 	 0 	 doc1884              	 1
+12 	 0 	 doc9187              	 1
+12 	 0 	 doc3131              	 1
+12 	 0 	 doc2911              	 1
+12 	 0 	 doc2589              	 1
+12 	 0 	 doc3747              	 1
+12 	 0 	 doc3813              	 1
+12 	 0 	 doc5222              	 1
+12 	 0 	 doc6023              	 1
+12 	 0 	 doc6624              	 1
+12 	 0 	 doc7655              	 1
+12 	 0 	 doc9205              	 1
+12 	 0 	 doc12062             	 1
+12 	 0 	 doc15504             	 1
+12 	 0 	 doc13625             	 1
+12 	 0 	 doc18704             	 1
+12 	 0 	 doc2277              	 1
+
+
+
+13 	 0 	 doc4948              	 1
+13 	 0 	 doc21565             	 1
+13 	 0 	 doc17135             	 1
+13 	 0 	 doc1866              	 1
+13 	 0 	 doc13989             	 1
+13 	 0 	 doc5605              	 1
+13 	 0 	 doc13431             	 1
+13 	 0 	 doc2100              	 1
+13 	 0 	 doc16347             	 1
+13 	 0 	 doc16894             	 1
+13 	 0 	 doc6764              	 1
+13 	 0 	 doc8554              	 1
+13 	 0 	 doc8695              	 1
+13 	 0 	 doc8977              	 1
+13 	 0 	 doc19478             	 1
+13 	 0 	 doc14595             	 1
+13 	 0 	 doc2408              	 1
+13 	 0 	 doc2592              	 1
+13 	 0 	 doc10947             	 1
+13 	 0 	 doc15794             	 1
+13 	 0 	 doc5236              	 1
+13 	 0 	 doc14847             	 1
+13 	 0 	 doc3980              	 1
+13 	 0 	 doc1844              	 1
+13 	 0 	 doc42                	 1
+13 	 0 	 doc7783              	 1
+13 	 0 	 doc4557              	 1
+13 	 0 	 doc16423             	 1
+13 	 0 	 doc17170             	 1
+13 	 0 	 doc5822              	 1
+
+
+
+14 	 0 	 doc17172             	 1
+14 	 0 	 doc17210             	 1
+14 	 0 	 doc5044              	 1
+14 	 0 	 doc4627              	 1
+14 	 0 	 doc4683              	 1
+14 	 0 	 doc15126             	 1
+14 	 0 	 doc4538              	 1
+14 	 0 	 doc273               	 1
+14 	 0 	 doc19585             	 1
+14 	 0 	 doc16078             	 1
+14 	 0 	 doc4529              	 1
+14 	 0 	 doc4186              	 1
+14 	 0 	 doc12961             	 1
+14 	 0 	 doc19217             	 1
+14 	 0 	 doc5670              	 1
+14 	 0 	 doc1699              	 1
+14 	 0 	 doc4716              	 1
+14 	 0 	 doc12644             	 1
+14 	 0 	 doc18387             	 1
+14 	 0 	 doc336               	 1
+14 	 0 	 doc16130             	 1
+14 	 0 	 doc18718             	 1
+14 	 0 	 doc12527             	 1
+14 	 0 	 doc11797             	 1
+14 	 0 	 doc11831             	 1
+14 	 0 	 doc7538              	 1
+14 	 0 	 doc17259             	 1
+14 	 0 	 doc18724             	 1
+14 	 0 	 doc19330             	 1
+14 	 0 	 doc19206             	 1
+
+
+
+15 	 0 	 doc12198             	 1
+15 	 0 	 doc20371             	 1
+15 	 0 	 doc2947              	 1
+15 	 0 	 doc10750             	 1
+15 	 0 	 doc7239              	 1
+15 	 0 	 doc14189             	 1
+15 	 0 	 doc19474             	 1
+15 	 0 	 doc14776             	 1
+15 	 0 	 doc21270             	 1
+15 	 0 	 doc6387              	 1
+15 	 0 	 doc12908             	 1
+15 	 0 	 doc9573              	 1
+15 	 0 	 doc17102             	 1
+15 	 0 	 doc21482             	 1
+15 	 0 	 doc6524              	 1
+15 	 0 	 doc18034             	 1
+15 	 0 	 doc1358              	 1
+15 	 0 	 doc13147             	 1
+15 	 0 	 doc17731             	 1
+15 	 0 	 doc12890             	 1
+15 	 0 	 doc20887             	 1
+15 	 0 	 doc19508             	 1
+15 	 0 	 doc18498             	 1
+15 	 0 	 doc20642             	 1
+15 	 0 	 doc19878             	 1
+15 	 0 	 doc6556              	 1
+15 	 0 	 doc10272             	 1
+15 	 0 	 doc5720              	 1
+15 	 0 	 doc17578             	 1
+15 	 0 	 doc17164             	 1
+
+
+# --- m==0: avg_precision and recall are hurt, by marking fake docs as relevant
+
+16 	 0 	 fakedoc1             	 1
+16 	 0 	 fakedoc2             	 1
+16 	 0 	 fakedoc3             	 1
+16 	 0 	 fakedoc4             	 1
+
+16 	 0 	 doc4043              	 1
+16 	 0 	 doc14985             	 1
+16 	 0 	 doc15370             	 1
+16 	 0 	 doc15426             	 1
+16 	 0 	 doc1702              	 1
+16 	 0 	 doc3062              	 1
+16 	 0 	 doc16134             	 1
+16 	 0 	 doc15037             	 1
+16 	 0 	 doc8224              	 1
+16 	 0 	 doc5044              	 1
+16 	 0 	 doc8545              	 1
+16 	 0 	 doc7228              	 1
+16 	 0 	 doc12686             	 1
+16 	 0 	 doc16609             	 1
+16 	 0 	 doc13161             	 1
+16 	 0 	 doc3446              	 1
+16 	 0 	 doc16493             	 1
+16 	 0 	 doc19297             	 1
+16 	 0 	 doc13619             	 1
+16 	 0 	 doc3281              	 1
+16 	 0 	 doc15499             	 1
+16 	 0 	 doc7373              	 1
+16 	 0 	 doc9064              	 1
+16 	 0 	 doc1710              	 1
+16 	 0 	 doc15411             	 1
+16 	 0 	 doc10890             	 1
+16 	 0 	 doc3166              	 1
+16 	 0 	 doc17894             	 1
+16 	 0 	 doc4560              	 1
+16 	 0 	 doc12766             	 1
+
+
+# --- m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
+
+17 	 0 	 doc3117              	 0
+17 	 0 	 doc7477              	 0
+17 	 0 	 doc7569              	 0
+17 	 0 	 doc20667             	 0
+17 	 0 	 doc20260             	 1
+17 	 0 	 doc17355             	 1
+17 	 0 	 doc11021             	 1
+17 	 0 	 doc20934             	 1
+17 	 0 	 doc552               	 1
+17 	 0 	 doc20856             	 1
+17 	 0 	 doc3524              	 1
+17 	 0 	 doc17343             	 1
+17 	 0 	 doc21055             	 1
+17 	 0 	 doc19032             	 1
+17 	 0 	 doc19786             	 1
+17 	 0 	 doc9281              	 1
+17 	 0 	 doc1695              	 1
+17 	 0 	 doc15940             	 1
+17 	 0 	 doc9215              	 1
+17 	 0 	 doc8335              	 1
+17 	 0 	 doc20936             	 1
+17 	 0 	 doc6914              	 1
+17 	 0 	 doc12122             	 1
+17 	 0 	 doc6618              	 1
+17 	 0 	 doc5049              	 1
+17 	 0 	 doc450               	 1
+17 	 0 	 doc19206             	 1
+17 	 0 	 doc18823             	 1
+17 	 0 	 doc5307              	 1
+17 	 0 	 doc17295             	 1
+
+
+# ---- m==2: all precision, precision_at_n and recall are hurt.
+
+18 	 0 	 fakedoc1             	 1
+18 	 0 	 fakedoc2             	 1
+18 	 0 	 fakedoc3             	 1
+18 	 0 	 fakedoc4             	 1
+
+18 	 0 	 doc8064              	 0
+18 	 0 	 doc18142             	 0
+18 	 0 	 doc19383             	 0
+18 	 0 	 doc21151             	 0
+18 	 0 	 doc4665              	 1
+18 	 0 	 doc2897              	 1
+18 	 0 	 doc6878              	 1
+18 	 0 	 doc14507             	 1
+18 	 0 	 doc2976              	 1
+18 	 0 	 doc11757             	 1
+18 	 0 	 doc12625             	 1
+18 	 0 	 doc14908             	 1
+18 	 0 	 doc12790             	 1
+18 	 0 	 doc17915             	 1
+18 	 0 	 doc11804             	 1
+18 	 0 	 doc12935             	 1
+18 	 0 	 doc8225              	 1
+18 	 0 	 doc18011             	 1
+18 	 0 	 doc10493             	 1
+18 	 0 	 doc17922             	 1
+18 	 0 	 doc1902              	 1
+18 	 0 	 doc14049             	 1
+18 	 0 	 doc1334              	 1
+18 	 0 	 doc1168              	 1
+18 	 0 	 doc4859              	 1
+18 	 0 	 doc7124              	 1
+18 	 0 	 doc9692              	 1
+18 	 0 	 doc18402             	 1
+18 	 0 	 doc9089              	 1
+18 	 0 	 doc15375             	 1
+
+
+19 	 0 	 doc5267              	 1
+19 	 0 	 doc2310              	 1
+19 	 0 	 doc11435             	 1
+19 	 0 	 doc15666             	 1
+19 	 0 	 doc12733             	 1
+19 	 0 	 doc7925              	 1
+19 	 0 	 doc2444              	 1
+19 	 0 	 doc4900              	 1
+19 	 0 	 doc10803             	 1
+19 	 0 	 doc8869              	 1
+19 	 0 	 doc5051              	 1
+19 	 0 	 doc9163              	 1
+19 	 0 	 doc529               	 1
+19 	 0 	 doc19546             	 1
+19 	 0 	 doc18561             	 1
+19 	 0 	 doc10634             	 1
+19 	 0 	 doc3979              	 1
+19 	 0 	 doc8833              	 1
+19 	 0 	 doc7652              	 1
+19 	 0 	 doc4804              	 1
+19 	 0 	 doc12616             	 1
+19 	 0 	 doc8419              	 1
+19 	 0 	 doc9431              	 1
+19 	 0 	 doc16235             	 1
+19 	 0 	 doc732               	 1
+19 	 0 	 doc2515              	 1
+19 	 0 	 doc7194              	 1
+19 	 0 	 doc16301             	 1
+19 	 0 	 doc4494              	 1
+19 	 0 	 doc4496              	 1
--- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt
+++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt
@ -0,0 +1,281 @@
+# -----------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# -----------------------------------------------------------------------
+
+# ------------------------------------------------------------
+# This file was created using utils.QualityQueriesFinder.
+# See also TrecQRels.txt.
+# ------------------------------------------------------------
+
+<top>
+<num> Number: 0
+
+<title> statement months  total 1987
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 1
+
+<title> agreed 15  against five
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 2
+
+<title> nine only  month international
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 3
+
+<title> finance any  10 government
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 4
+
+<title> issue next  years all
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 5
+
+<title> who major  ltd today
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 6
+
+<title> business revs  securities per
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 7
+
+<title> quarter time  note sales
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 8
+
+<title> february earlier  loss group
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 9
+
+<title> out end  made some
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 10
+
+<title> spokesman financial  30 expected
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 11
+
+<title> 1985 now  prices due
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 12
+
+<title> before board  record could
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 13
+
+<title> pay debt  because trade
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 14
+
+<title> meeting increase  four price
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 15
+
+<title> chairman rate  six interest
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 16
+
+<title> since current  between agreement
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 17
+
+<title> oil we  when president
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 18
+
+<title> capital through  foreign added
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>
+
+<top>
+<num> Number: 19
+
+<title> 20 while  common week
+
+<desc> Description:
+
+
+<narr> Narrative:
+
+
+</top>