LUCENE-2254: add support to quality pkg for any combination of (title,description,narrative)

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@912308 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-02-21 07:00:08 +00:00
parent 3abdb99087
commit 349ca0943e
3 changed files with 38 additions and 8 deletions

View File

@ -2,6 +2,11 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
2/21/2020
LUCENE-2254: Add support to the quality package for running
experiments with any combination of Title, Description, and Narrative.
(Robert Muir)
1/28/2010
LUCENE-2223: Add a benchmark for ShingleFilter. You can wrap any
analyzer with ShingleAnalyzerWrapper and specify shingle parameters

View File

@ -30,6 +30,8 @@ import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Set;
/**
@ -38,12 +40,14 @@ import java.io.PrintWriter;
**/
public class QueryDriver {
public static void main(String[] args) throws Exception {
if (args.length != 4) {
System.err.println("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir>");
if (args.length < 4 || args.length > 5) {
System.err.println("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]");
System.err.println("topicsFile: input file containing queries");
System.err.println("qrelsFile: input file containing relevance judgements");
System.err.println("submissionFile: output submission file for trec_eval");
System.err.println("indexDir: index directory");
System.err.println("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:");
System.err.println("\texample: TD (query on Title + Description). The default is T (title only)");
System.exit(1);
}
@ -51,6 +55,7 @@ public class QueryDriver {
File qrelsFile = new File(args[1]);
SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene");
FSDirectory dir = FSDirectory.open(new File(args[3]));
String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
Searcher searcher = new IndexSearcher(dir, true);
int maxResults = 1000;
@ -68,8 +73,13 @@ public class QueryDriver {
// validate topics & judgments match each other
judge.validateData(qqs, logger);
Set<String> fieldSet = new HashSet<String>();
if (fieldSpec.indexOf('T') >= 0) fieldSet.add("title");
if (fieldSpec.indexOf('D') >= 0) fieldSet.add("description");
if (fieldSpec.indexOf('N') >= 0) fieldSet.add("narrative");
// set the parsing of quality queries into Lucene queries.
QualityQueryParser qqParser = new SimpleQQParser("title", "body");
QualityQueryParser qqParser = new SimpleQQParser(fieldSet.toArray(new String[0]), "body");
// run the benchmark
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);

View File

@ -21,27 +21,38 @@ import org.apache.lucene.benchmark.quality.QualityQuery;
import org.apache.lucene.benchmark.quality.QualityQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
/**
* Simplistic quality query parser. A Lucene query is created by passing
* the value of the specified QualityQuery name-value pair into
* the value of the specified QualityQuery name-value pair(s) into
* a Lucene's QueryParser using StandardAnalyzer. */
public class SimpleQQParser implements QualityQueryParser {
private String qqName;
private String qqNames[];
private String indexField;
ThreadLocal<QueryParser> queryParser = new ThreadLocal<QueryParser>();
/**
* Constructor of a simple qq parser.
* @param qqName name-value pairs of quality query to use for creating the query
* @param indexField corresponding index field
*/
public SimpleQQParser(String qqNames[], String indexField) {
this.qqNames = qqNames;
this.indexField = indexField;
}
/**
* Constructor of a simple qq parser.
* @param qqName name-value pair of quality query to use for creating the query
* @param indexField corresponding index field
*/
public SimpleQQParser(String qqName, String indexField) {
this.qqName = qqName;
this.indexField = indexField;
this(new String[] { qqName }, indexField);
}
/* (non-Javadoc)
@ -53,7 +64,11 @@ public class SimpleQQParser implements QualityQueryParser {
qp = new QueryParser(Version.LUCENE_CURRENT, indexField, new StandardAnalyzer(Version.LUCENE_CURRENT));
queryParser.set(qp);
}
return qp.parse(qq.getValue(qqName));
BooleanQuery bq = new BooleanQuery();
for (int i = 0; i < qqNames.length; i++)
bq.add(qp.parse(QueryParser.escape(qq.getValue(qqNames[i]))), BooleanClause.Occur.SHOULD);
return bq;
}
}