mirror of https://github.com/apache/lucene.git
LUCENE-7695: support synonyms in ComplexPhraseQueryParser
This commit is contained in:
parent
d8442070cf
commit
8a5492930e
|
@ -204,6 +204,9 @@ Improvements
|
|||
IndexInput description instead of plain IOException (Mike Drob via
|
||||
Mike McCandless)
|
||||
|
||||
* LUCENE-7695: ComplexPhraseQueryParser to support query time synonyms (Markus Jelsma
|
||||
via Mikhail Khludnev)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7641: Optimized point range queries to compute documents that do not
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -35,6 +36,7 @@ import org.apache.lucene.search.MatchNoDocsQuery;
|
|||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.spans.SpanBoostQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
|
@ -257,6 +259,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
// ArrayList spanClauses = new ArrayList();
|
||||
if (contents instanceof TermQuery
|
||||
|| contents instanceof MultiTermQuery
|
||||
|| contents instanceof SynonymQuery
|
||||
) {
|
||||
return contents;
|
||||
}
|
||||
|
@ -287,9 +290,11 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
qc = ((BoostQuery) qc).getQuery();
|
||||
}
|
||||
|
||||
if (qc instanceof BooleanQuery) {
|
||||
if (qc instanceof BooleanQuery || qc instanceof SynonymQuery) {
|
||||
ArrayList<SpanQuery> sc = new ArrayList<>();
|
||||
addComplexPhraseClause(sc, (BooleanQuery) qc);
|
||||
BooleanQuery booleanCaluse = qc instanceof BooleanQuery ?
|
||||
(BooleanQuery) qc : convert((SynonymQuery) qc);
|
||||
addComplexPhraseClause(sc, booleanCaluse);
|
||||
if (sc.size() > 0) {
|
||||
allSpanClauses[i] = sc.get(0);
|
||||
} else {
|
||||
|
@ -309,14 +314,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
if (qc instanceof TermQuery) {
|
||||
TermQuery tq = (TermQuery) qc;
|
||||
allSpanClauses[i] = new SpanTermQuery(tq.getTerm());
|
||||
} else {
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unknown query type \""
|
||||
+ qc.getClass().getName()
|
||||
+ "\" found in phrase query string \""
|
||||
+ phrasedQueryStringContents + "\"");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
if (numNegatives == 0) {
|
||||
|
@ -354,6 +359,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
return snot;
|
||||
}
|
||||
|
||||
private BooleanQuery convert(SynonymQuery qc) {
|
||||
BooleanQuery.Builder bqb = new BooleanQuery.Builder();
|
||||
for (Term t : qc.getTerms()){
|
||||
bqb.add(new BooleanClause(new TermQuery(t), Occur.SHOULD));
|
||||
}
|
||||
return bqb.build();
|
||||
}
|
||||
|
||||
private void addComplexPhraseClause(List<SpanQuery> spanClauses, BooleanQuery qc) {
|
||||
ArrayList<SpanQuery> ors = new ArrayList<>();
|
||||
ArrayList<SpanQuery> nots = new ArrayList<>();
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.util.HashSet;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockSynonymAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
|
@ -39,7 +40,11 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
|
|||
new DocData("john smith", "1", "developer"),
|
||||
new DocData("johathon smith", "2", "developer"),
|
||||
new DocData("john percival smith", "3", "designer"),
|
||||
new DocData("jackson waits tom", "4", "project manager")
|
||||
new DocData("jackson waits tom", "4", "project manager"),
|
||||
new DocData("johny perkins", "5", "orders pizza"),
|
||||
new DocData("hapax neverson", "6", "never matches"),
|
||||
new DocData("dog cigar", "7", "just for synonyms"),
|
||||
new DocData("dogs don't smoke cigarettes", "8", "just for synonyms"),
|
||||
};
|
||||
|
||||
private IndexSearcher searcher;
|
||||
|
@ -73,12 +78,30 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSingleTermPhrase() throws Exception {
|
||||
checkMatches("\"joh*\" \"tom\"", "1,2,3,4");
|
||||
checkMatches("\"joh*\"","1,2,3,5");
|
||||
checkMatches("\"joh~\"","1,3,5");
|
||||
checkMatches("\"joh*\" \"tom\"", "1,2,3,4,5");
|
||||
checkMatches("+\"j*\" +\"tom\"", "4");
|
||||
checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3");
|
||||
checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3,5,8");
|
||||
checkMatches("+\"j*hn\" +\"sm*h\"", "1,3");
|
||||
}
|
||||
|
||||
public void testSynonyms() throws Exception {
|
||||
checkMatches("\"dogs\"","8");
|
||||
MockSynonymAnalyzer synonym = new MockSynonymAnalyzer();
|
||||
checkMatches("\"dogs\"","7,8",synonym);
|
||||
// synonym is unidirectional
|
||||
checkMatches("\"dog\"","7",synonym);
|
||||
checkMatches("\"dogs cigar*\"","");
|
||||
checkMatches("\"dog cigar*\"","7");
|
||||
checkMatches("\"dogs cigar*\"","7", synonym);
|
||||
checkMatches("\"dog cigar*\"","7", synonym);
|
||||
checkMatches("\"dogs cigar*\"~2","7,8", synonym);
|
||||
// synonym is unidirectional
|
||||
checkMatches("\"dog cigar*\"~2","7", synonym);
|
||||
|
||||
}
|
||||
|
||||
public void testUnOrderedProximitySearches() throws Exception {
|
||||
|
||||
inOrder = true;
|
||||
|
@ -98,8 +121,13 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void checkMatches(String qString, String expectedVals)
|
||||
throws Exception {
|
||||
checkMatches(qString, expectedVals, analyzer);
|
||||
}
|
||||
|
||||
private void checkMatches(String qString, String expectedVals, Analyzer anAnalyzer)
|
||||
throws Exception {
|
||||
ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
|
||||
ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, anAnalyzer);
|
||||
qp.setInOrder(inOrder);
|
||||
qp.setFuzzyPrefixLength(1); // usually a good idea
|
||||
|
||||
|
|
Loading…
Reference in New Issue