mirror of https://github.com/apache/lucene.git
LUCENE-2254: add support to quality pkg for any combination of (title,description,narrative)
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@912308 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3abdb99087
commit
349ca0943e
|
@ -2,6 +2,11 @@ Lucene Benchmark Contrib Change Log
|
|||
|
||||
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
|
||||
|
||||
2/21/2020
|
||||
LUCENE-2254: Add support to the quality package for running
|
||||
experiments with any combination of Title, Description, and Narrative.
|
||||
(Robert Muir)
|
||||
|
||||
1/28/2010
|
||||
LUCENE-2223: Add a benchmark for ShingleFilter. You can wrap any
|
||||
analyzer with ShingleAnalyzerWrapper and specify shingle parameters
|
||||
|
|
|
@ -30,6 +30,8 @@ import java.io.BufferedReader;
|
|||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -38,12 +40,14 @@ import java.io.PrintWriter;
|
|||
**/
|
||||
public class QueryDriver {
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length != 4) {
|
||||
System.err.println("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir>");
|
||||
if (args.length < 4 || args.length > 5) {
|
||||
System.err.println("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]");
|
||||
System.err.println("topicsFile: input file containing queries");
|
||||
System.err.println("qrelsFile: input file containing relevance judgements");
|
||||
System.err.println("submissionFile: output submission file for trec_eval");
|
||||
System.err.println("indexDir: index directory");
|
||||
System.err.println("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:");
|
||||
System.err.println("\texample: TD (query on Title + Description). The default is T (title only)");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
|
@ -51,6 +55,7 @@ public class QueryDriver {
|
|||
File qrelsFile = new File(args[1]);
|
||||
SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene");
|
||||
FSDirectory dir = FSDirectory.open(new File(args[3]));
|
||||
String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
|
||||
Searcher searcher = new IndexSearcher(dir, true);
|
||||
|
||||
int maxResults = 1000;
|
||||
|
@ -68,8 +73,13 @@ public class QueryDriver {
|
|||
// validate topics & judgments match each other
|
||||
judge.validateData(qqs, logger);
|
||||
|
||||
Set<String> fieldSet = new HashSet<String>();
|
||||
if (fieldSpec.indexOf('T') >= 0) fieldSet.add("title");
|
||||
if (fieldSpec.indexOf('D') >= 0) fieldSet.add("description");
|
||||
if (fieldSpec.indexOf('N') >= 0) fieldSet.add("narrative");
|
||||
|
||||
// set the parsing of quality queries into Lucene queries.
|
||||
QualityQueryParser qqParser = new SimpleQQParser("title", "body");
|
||||
QualityQueryParser qqParser = new SimpleQQParser(fieldSet.toArray(new String[0]), "body");
|
||||
|
||||
// run the benchmark
|
||||
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
|
||||
|
|
|
@ -21,27 +21,38 @@ import org.apache.lucene.benchmark.quality.QualityQuery;
|
|||
import org.apache.lucene.benchmark.quality.QualityQueryParser;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Simplistic quality query parser. A Lucene query is created by passing
|
||||
* the value of the specified QualityQuery name-value pair into
|
||||
* the value of the specified QualityQuery name-value pair(s) into
|
||||
* a Lucene's QueryParser using StandardAnalyzer. */
|
||||
public class SimpleQQParser implements QualityQueryParser {
|
||||
|
||||
private String qqName;
|
||||
private String qqNames[];
|
||||
private String indexField;
|
||||
ThreadLocal<QueryParser> queryParser = new ThreadLocal<QueryParser>();
|
||||
|
||||
/**
|
||||
* Constructor of a simple qq parser.
|
||||
* @param qqName name-value pairs of quality query to use for creating the query
|
||||
* @param indexField corresponding index field
|
||||
*/
|
||||
public SimpleQQParser(String qqNames[], String indexField) {
|
||||
this.qqNames = qqNames;
|
||||
this.indexField = indexField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor of a simple qq parser.
|
||||
* @param qqName name-value pair of quality query to use for creating the query
|
||||
* @param indexField corresponding index field
|
||||
*/
|
||||
public SimpleQQParser(String qqName, String indexField) {
|
||||
this.qqName = qqName;
|
||||
this.indexField = indexField;
|
||||
this(new String[] { qqName }, indexField);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
@ -53,7 +64,11 @@ public class SimpleQQParser implements QualityQueryParser {
|
|||
qp = new QueryParser(Version.LUCENE_CURRENT, indexField, new StandardAnalyzer(Version.LUCENE_CURRENT));
|
||||
queryParser.set(qp);
|
||||
}
|
||||
return qp.parse(qq.getValue(qqName));
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
for (int i = 0; i < qqNames.length; i++)
|
||||
bq.add(qp.parse(QueryParser.escape(qq.getValue(qqNames[i]))), BooleanClause.Occur.SHOULD);
|
||||
|
||||
return bq;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue