mirror of https://github.com/apache/lucene.git
Initial commit of LUCENE-1486 - a subclass of the default QueryParser that overrides the parsing of PhraseQueries to allow more complex syntax e.g. wildcards in phrase queries
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@791579 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
28d65ceee7
commit
f7fa579971
|
@ -521,6 +521,12 @@ New features
|
||||||
All standard parsers now also implement Serializable and enforce
|
All standard parsers now also implement Serializable and enforce
|
||||||
their singleton status. (Uwe Schindler, Mike McCandless)
|
their singleton status. (Uwe Schindler, Mike McCandless)
|
||||||
|
|
||||||
|
31. LUCENE-1486: Added ComplexPhraseQueryParser, an extension of QueryParser
|
||||||
|
that allows a subset of the Lucene query language to be embedded in
|
||||||
|
PhraseQuerys. Wildcard, Range, and Fuzzy queries, as well as limited
|
||||||
|
boolean logic, can be used within quote operators with this parser, ie:
|
||||||
|
"(jo* -john) smyth~". (Mark Harwood via Mark Miller)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
||||||
|
|
|
@ -0,0 +1,386 @@
|
||||||
|
package org.apache.lucene.queryParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TermRangeQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* QueryParser which permits complex phrase query syntax e.g. "(john jon
|
||||||
|
* jonathan~) peters*"
|
||||||
|
*
|
||||||
|
* Performs potentially multiple passes over Query text to parse any nested
|
||||||
|
* logic in PhraseQueries. - First pass takes any PhraseQuery content between
|
||||||
|
* quotes and stores for subsequent pass. All other query content is parsed as
|
||||||
|
* normal - Second pass parses any stored PhraseQuery content, checking all
|
||||||
|
* embedded clauses are referring to the same field and therefore can be
|
||||||
|
* rewritten as Span queries. All PhraseQuery clauses are expressed as
|
||||||
|
* ComplexPhraseQuery objects
|
||||||
|
*
|
||||||
|
* This could arguably be done in one pass using a new QueryParser but here I am
|
||||||
|
* working within the constraints of the existing parser as a base class. This
|
||||||
|
* currently simply feeds all phrase content through an analyzer to select
|
||||||
|
* phrase terms - any "special" syntax such as * ~ * etc are not given special
|
||||||
|
* status
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ComplexPhraseQueryParser extends QueryParser {
|
||||||
|
private ArrayList/*<ComplexPhraseQuery>*/complexPhrases = null;
|
||||||
|
|
||||||
|
private boolean isPass2ResolvingPhrases;
|
||||||
|
|
||||||
|
private ComplexPhraseQuery currentPhraseQuery = null;
|
||||||
|
|
||||||
|
public ComplexPhraseQueryParser(String f, Analyzer a) {
|
||||||
|
super(f, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Query getFieldQuery(String field, String queryText, int slop) {
|
||||||
|
ComplexPhraseQuery cpq = new ComplexPhraseQuery(field, queryText, slop);
|
||||||
|
complexPhrases.add(cpq); // add to list of phrases to be parsed once
|
||||||
|
// we
|
||||||
|
// are through with this pass
|
||||||
|
return cpq;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query parse(String query) throws ParseException {
|
||||||
|
if (isPass2ResolvingPhrases) {
|
||||||
|
boolean oldConstantScoreRewriteSetting = getConstantScoreRewrite();
|
||||||
|
try {
|
||||||
|
// Temporarily set constantScoreRewrite to false so that Parser will
|
||||||
|
// generate visible
|
||||||
|
// collection of terms which we can convert into SpanQueries.
|
||||||
|
// ConstantScoreRewrite mode produces an
|
||||||
|
// opaque ConstantScoreQuery object which cannot be interrogated for
|
||||||
|
// terms in the same way a BooleanQuery can.
|
||||||
|
// QueryParser is not guaranteed threadsafe anyway so this temporary
|
||||||
|
// state change should not
|
||||||
|
// present an issue
|
||||||
|
setConstantScoreRewrite(false);
|
||||||
|
return super.parse(query);
|
||||||
|
} finally {
|
||||||
|
setConstantScoreRewrite(oldConstantScoreRewriteSetting);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// First pass - parse the top-level query recording any PhraseQuerys
|
||||||
|
// which will need to be resolved
|
||||||
|
complexPhrases = new ArrayList/*<ComplexPhraseQuery>*/();
|
||||||
|
Query q = super.parse(query);
|
||||||
|
|
||||||
|
// Perform second pass, using this QueryParser to parse any nested
|
||||||
|
// PhraseQueries with different
|
||||||
|
// set of syntax restrictions (i.e. all fields must be same)
|
||||||
|
isPass2ResolvingPhrases = true;
|
||||||
|
try {
|
||||||
|
for (Iterator iterator = complexPhrases.iterator(); iterator.hasNext();) {
|
||||||
|
currentPhraseQuery = (ComplexPhraseQuery) iterator.next();
|
||||||
|
// in each phrase, now parse the contents between quotes as a
|
||||||
|
// separate parse operation
|
||||||
|
currentPhraseQuery.parsePhraseElements(this);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
isPass2ResolvingPhrases = false;
|
||||||
|
}
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
|
||||||
|
// There is No "getTermQuery throws ParseException" method to override so
|
||||||
|
// unfortunately need
|
||||||
|
// to throw a runtime exception here if a term for another field is embedded
|
||||||
|
// in phrase query
|
||||||
|
protected Query newTermQuery(Term term) {
|
||||||
|
if (isPass2ResolvingPhrases) {
|
||||||
|
try {
|
||||||
|
checkPhraseClauseIsForSameField(term.field());
|
||||||
|
} catch (ParseException pe) {
|
||||||
|
throw new RuntimeException("Error parsing complex phrase", pe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return super.newTermQuery(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper method used to report on any clauses that appear in query syntax
|
||||||
|
private void checkPhraseClauseIsForSameField(String field)
|
||||||
|
throws ParseException {
|
||||||
|
if (!field.equals(currentPhraseQuery.field)) {
|
||||||
|
throw new ParseException("Cannot have clause for field \"" + field
|
||||||
|
+ "\" nested in phrase " + " for field \"" + currentPhraseQuery.field
|
||||||
|
+ "\"");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Query getWildcardQuery(String field, String termStr)
|
||||||
|
throws ParseException {
|
||||||
|
if (isPass2ResolvingPhrases) {
|
||||||
|
checkPhraseClauseIsForSameField(field);
|
||||||
|
}
|
||||||
|
return super.getWildcardQuery(field, termStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Query getRangeQuery(String field, String part1, String part2,
|
||||||
|
boolean inclusive) throws ParseException {
|
||||||
|
if (isPass2ResolvingPhrases) {
|
||||||
|
checkPhraseClauseIsForSameField(field);
|
||||||
|
}
|
||||||
|
return super.getRangeQuery(field, part1, part2, inclusive);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Query newRangeQuery(String field, String part1, String part2,
|
||||||
|
boolean inclusive) {
|
||||||
|
if (isPass2ResolvingPhrases) {
|
||||||
|
// Must use old-style RangeQuery in order to produce a BooleanQuery
|
||||||
|
// that can be turned into SpanOr clause
|
||||||
|
TermRangeQuery rangeQuery = new TermRangeQuery(field, part1, part2, inclusive, inclusive,
|
||||||
|
getRangeCollator());
|
||||||
|
rangeQuery.setConstantScoreRewrite(false);;
|
||||||
|
return rangeQuery;
|
||||||
|
}
|
||||||
|
return super.newRangeQuery(field, part1, part2, inclusive);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Query getFuzzyQuery(String field, String termStr,
|
||||||
|
float minSimilarity) throws ParseException {
|
||||||
|
if (isPass2ResolvingPhrases) {
|
||||||
|
checkPhraseClauseIsForSameField(field);
|
||||||
|
}
|
||||||
|
return super.getFuzzyQuery(field, termStr, minSimilarity);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used to handle the query content in between quotes and produced Span-based
|
||||||
|
* interpretations of the clauses.
|
||||||
|
*/
|
||||||
|
static class ComplexPhraseQuery extends Query {
|
||||||
|
|
||||||
|
String field;
|
||||||
|
|
||||||
|
String phrasedQueryStringContents;
|
||||||
|
|
||||||
|
int slopFactor;
|
||||||
|
|
||||||
|
private Query contents;
|
||||||
|
|
||||||
|
public ComplexPhraseQuery(String field, String phrasedQueryStringContents,
|
||||||
|
int slopFactor) {
|
||||||
|
super();
|
||||||
|
this.field = field;
|
||||||
|
this.phrasedQueryStringContents = phrasedQueryStringContents;
|
||||||
|
this.slopFactor = slopFactor;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Called by ComplexPhraseQueryParser for each phrase after the main
|
||||||
|
// parse
|
||||||
|
// thread is through
|
||||||
|
protected void parsePhraseElements(QueryParser qp) throws ParseException {
|
||||||
|
// TODO ensure that field-sensitivity is preserved ie the query
|
||||||
|
// string below is parsed as
|
||||||
|
// field+":("+phrasedQueryStringContents+")"
|
||||||
|
// but this will need code in rewrite to unwrap the first layer of
|
||||||
|
// boolean query
|
||||||
|
contents = qp.parse(phrasedQueryStringContents);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
// ArrayList spanClauses = new ArrayList();
|
||||||
|
if (contents instanceof TermQuery) {
|
||||||
|
return contents;
|
||||||
|
}
|
||||||
|
// Build a sequence of Span clauses arranged in a SpanNear - child
|
||||||
|
// clauses can be complex
|
||||||
|
// Booleans e.g. nots and ors etc
|
||||||
|
int numNegatives = 0;
|
||||||
|
if (!(contents instanceof BooleanQuery)) {
|
||||||
|
throw new IllegalArgumentException("Unknown query type \""
|
||||||
|
+ contents.getClass().getName()
|
||||||
|
+ "\" found in phrase query string \"" + phrasedQueryStringContents
|
||||||
|
+ "\"");
|
||||||
|
}
|
||||||
|
BooleanQuery bq = (BooleanQuery) contents;
|
||||||
|
BooleanClause[] bclauses = bq.getClauses();
|
||||||
|
SpanQuery[] allSpanClauses = new SpanQuery[bclauses.length];
|
||||||
|
// For all clauses e.g. one* two~
|
||||||
|
for (int i = 0; i < bclauses.length; i++) {
|
||||||
|
// HashSet bclauseterms=new HashSet();
|
||||||
|
Query qc = bclauses[i].getQuery();
|
||||||
|
// Rewrite this clause e.g one* becomes (one OR onerous)
|
||||||
|
qc = qc.rewrite(reader);
|
||||||
|
if (bclauses[i].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
|
||||||
|
numNegatives++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (qc instanceof BooleanQuery) {
|
||||||
|
ArrayList sc = new ArrayList();
|
||||||
|
addComplexPhraseClause(sc, (BooleanQuery) qc);
|
||||||
|
if (sc.size() > 0) {
|
||||||
|
allSpanClauses[i] = (SpanQuery) sc.get(0);
|
||||||
|
} else {
|
||||||
|
// Insert fake term e.g. phrase query was for "Fred Smithe*" and
|
||||||
|
// there were no "Smithe*" terms - need to
|
||||||
|
// prevent match on just "Fred".
|
||||||
|
allSpanClauses[i] = new SpanTermQuery(new Term(field,
|
||||||
|
"Dummy clause because no terms found - must match nothing"));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (qc instanceof TermQuery) {
|
||||||
|
TermQuery tq = (TermQuery) qc;
|
||||||
|
allSpanClauses[i] = new SpanTermQuery(tq.getTerm());
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Unknown query type \""
|
||||||
|
+ qc.getClass().getName()
|
||||||
|
+ "\" found in phrase query string \""
|
||||||
|
+ phrasedQueryStringContents + "\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (numNegatives == 0) {
|
||||||
|
// The simple case - no negative elements in phrase
|
||||||
|
return new SpanNearQuery(allSpanClauses, slopFactor, true);
|
||||||
|
}
|
||||||
|
// Complex case - we have mixed positives and negatives in the
|
||||||
|
// sequence.
|
||||||
|
// Need to return a SpanNotQuery
|
||||||
|
ArrayList positiveClauses = new ArrayList();
|
||||||
|
for (int j = 0; j < allSpanClauses.length; j++) {
|
||||||
|
if (!bclauses[j].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
|
||||||
|
positiveClauses.add(allSpanClauses[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SpanQuery[] includeClauses = (SpanQuery[]) positiveClauses
|
||||||
|
.toArray(new SpanQuery[positiveClauses.size()]);
|
||||||
|
|
||||||
|
SpanQuery include = null;
|
||||||
|
if (includeClauses.length == 1) {
|
||||||
|
include = includeClauses[0]; // only one positive clause
|
||||||
|
} else {
|
||||||
|
// need to increase slop factor based on gaps introduced by
|
||||||
|
// negatives
|
||||||
|
include = new SpanNearQuery(includeClauses, slopFactor + numNegatives,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
// Use sequence of positive and negative values as the exclude.
|
||||||
|
SpanNearQuery exclude = new SpanNearQuery(allSpanClauses, slopFactor,
|
||||||
|
true);
|
||||||
|
SpanNotQuery snot = new SpanNotQuery(include, exclude);
|
||||||
|
return snot;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addComplexPhraseClause(List spanClauses, BooleanQuery qc) {
|
||||||
|
ArrayList ors = new ArrayList();
|
||||||
|
ArrayList nots = new ArrayList();
|
||||||
|
BooleanClause[] bclauses = qc.getClauses();
|
||||||
|
|
||||||
|
// For all clauses e.g. one* two~
|
||||||
|
for (int i = 0; i < bclauses.length; i++) {
|
||||||
|
Query childQuery = bclauses[i].getQuery();
|
||||||
|
|
||||||
|
// select the list to which we will add these options
|
||||||
|
ArrayList chosenList = ors;
|
||||||
|
if (bclauses[i].getOccur() == BooleanClause.Occur.MUST_NOT) {
|
||||||
|
chosenList = nots;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (childQuery instanceof TermQuery) {
|
||||||
|
TermQuery tq = (TermQuery) childQuery;
|
||||||
|
SpanTermQuery stq = new SpanTermQuery(tq.getTerm());
|
||||||
|
stq.setBoost(tq.getBoost());
|
||||||
|
chosenList.add(stq);
|
||||||
|
} else if (childQuery instanceof BooleanQuery) {
|
||||||
|
BooleanQuery cbq = (BooleanQuery) childQuery;
|
||||||
|
addComplexPhraseClause(chosenList, cbq);
|
||||||
|
} else {
|
||||||
|
// TODO alternatively could call extract terms here?
|
||||||
|
throw new IllegalArgumentException("Unknown query type:"
|
||||||
|
+ childQuery.getClass().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ors.size() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
SpanOrQuery soq = new SpanOrQuery((SpanQuery[]) ors
|
||||||
|
.toArray(new SpanQuery[ors.size()]));
|
||||||
|
if (nots.size() == 0) {
|
||||||
|
spanClauses.add(soq);
|
||||||
|
} else {
|
||||||
|
SpanOrQuery snqs = new SpanOrQuery((SpanQuery[]) nots
|
||||||
|
.toArray(new SpanQuery[nots.size()]));
|
||||||
|
SpanNotQuery snq = new SpanNotQuery(soq, snqs);
|
||||||
|
spanClauses.add(snq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(String field) {
|
||||||
|
return "\"" + phrasedQueryStringContents + "\"";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int result = 1;
|
||||||
|
result = prime * result + ((field == null) ? 0 : field.hashCode());
|
||||||
|
result = prime
|
||||||
|
* result
|
||||||
|
+ ((phrasedQueryStringContents == null) ? 0
|
||||||
|
: phrasedQueryStringContents.hashCode());
|
||||||
|
result = prime * result + slopFactor;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (this == obj)
|
||||||
|
return true;
|
||||||
|
if (obj == null)
|
||||||
|
return false;
|
||||||
|
if (getClass() != obj.getClass())
|
||||||
|
return false;
|
||||||
|
ComplexPhraseQuery other = (ComplexPhraseQuery) obj;
|
||||||
|
if (field == null) {
|
||||||
|
if (other.field != null)
|
||||||
|
return false;
|
||||||
|
} else if (!field.equals(other.field))
|
||||||
|
return false;
|
||||||
|
if (phrasedQueryStringContents == null) {
|
||||||
|
if (other.phrasedQueryStringContents != null)
|
||||||
|
return false;
|
||||||
|
} else if (!phrasedQueryStringContents
|
||||||
|
.equals(other.phrasedQueryStringContents))
|
||||||
|
return false;
|
||||||
|
if (slopFactor != other.slopFactor)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,143 @@
|
||||||
|
package org.apache.lucene.queryParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
public class TestComplexPhraseQuery extends TestCase {
|
||||||
|
|
||||||
|
Analyzer analyzer = new StandardAnalyzer();
|
||||||
|
|
||||||
|
DocData docsContent[] = { new DocData("john smith", "1"),
|
||||||
|
new DocData("johathon smith", "2"),
|
||||||
|
new DocData("john percival smith", "3"),
|
||||||
|
new DocData("jackson waits tom", "4") };
|
||||||
|
|
||||||
|
private IndexSearcher searcher;
|
||||||
|
|
||||||
|
String defaultFieldName = "name";
|
||||||
|
|
||||||
|
public void testComplexPhrases() throws Exception {
|
||||||
|
checkMatches("\"john smith\"", "1"); // Simple multi-term still works
|
||||||
|
checkMatches("\"j* smyth~\"", "1,2"); // wildcards and fuzzies are OK in
|
||||||
|
// phrases
|
||||||
|
checkMatches("\"(jo* -john) smith\"", "2"); // boolean logic works
|
||||||
|
checkMatches("\"jo* smith\"~2", "1,2,3"); // position logic works.
|
||||||
|
checkMatches("\"jo* [sma TO smZ]\" ", "1,2"); // range queries supported
|
||||||
|
checkMatches("\"john\"", "1,3"); // Simple single-term still works
|
||||||
|
checkMatches("\"(john OR johathon) smith\"", "1,2"); // boolean logic with
|
||||||
|
// brackets works.
|
||||||
|
checkMatches("\"(jo* -john) smyth~\"", "2"); // boolean logic with
|
||||||
|
// brackets works.
|
||||||
|
|
||||||
|
// checkMatches("\"john -percival\"", "1"); // not logic doesn't work
|
||||||
|
// currently :(.
|
||||||
|
|
||||||
|
checkMatches("\"john nosuchword*\"", ""); // phrases with clauses producing
|
||||||
|
// empty sets
|
||||||
|
|
||||||
|
checkBadQuery("\"jo* id:1 smith\""); // mixing fields in a phrase is bad
|
||||||
|
checkBadQuery("\"jo* \"smith\" \""); // phrases inside phrases is bad
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkBadQuery(String qString) {
|
||||||
|
QueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
|
||||||
|
Throwable expected = null;
|
||||||
|
try {
|
||||||
|
qp.parse(qString);
|
||||||
|
} catch (Throwable e) {
|
||||||
|
expected = e;
|
||||||
|
}
|
||||||
|
assertNotNull("Expected parse error in " + qString, expected);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkMatches(String qString, String expectedVals)
|
||||||
|
throws Exception {
|
||||||
|
QueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
|
||||||
|
qp.setFuzzyPrefixLength(1); // usually a good idea
|
||||||
|
|
||||||
|
Query q = qp.parse(qString);
|
||||||
|
|
||||||
|
HashSet expecteds = new HashSet();
|
||||||
|
String[] vals = expectedVals.split(",");
|
||||||
|
for (int i = 0; i < vals.length; i++) {
|
||||||
|
if (vals[i].length() > 0)
|
||||||
|
expecteds.add(vals[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
TopDocs td = searcher.search(q, 10);
|
||||||
|
ScoreDoc[] sd = td.scoreDocs;
|
||||||
|
for (int i = 0; i < sd.length; i++) {
|
||||||
|
Document doc = searcher.doc(sd[i].doc);
|
||||||
|
String id = doc.get("id");
|
||||||
|
assertTrue(qString + "matched doc#" + id + " not expected", expecteds
|
||||||
|
.contains(id));
|
||||||
|
expecteds.remove(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(qString + " missing some matches ", 0, expecteds.size());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setUp() throws Exception {
|
||||||
|
RAMDirectory rd = new RAMDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(rd, analyzer, MaxFieldLength.UNLIMITED);
|
||||||
|
for (int i = 0; i < docsContent.length; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("name", docsContent[i].name, Field.Store.YES,
|
||||||
|
Field.Index.ANALYZED));
|
||||||
|
doc.add(new Field("id", docsContent[i].id, Field.Store.YES,
|
||||||
|
Field.Index.ANALYZED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
w.close();
|
||||||
|
searcher = new IndexSearcher(rd);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void tearDown() throws Exception {
|
||||||
|
searcher.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
static class DocData {
|
||||||
|
String name;
|
||||||
|
|
||||||
|
String id;
|
||||||
|
|
||||||
|
public DocData(String name, String id) {
|
||||||
|
super();
|
||||||
|
this.name = name;
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue