LUCENE-7695: support synonyms in ComplexPhraseQueryParser

This commit is contained in:
Mikhail Khludnev 2017-03-05 12:24:47 +03:00
parent d8442070cf
commit 8a5492930e
3 changed files with 52 additions and 8 deletions

View File

@ -204,6 +204,9 @@ Improvements
IndexInput description instead of plain IOException (Mike Drob via
Mike McCandless)
* LUCENE-7695: ComplexPhraseQueryParser to support query time synonyms (Markus Jelsma
via Mikhail Khludnev)
Optimizations
* LUCENE-7641: Optimized point range queries to compute documents that do not

View File

@ -28,6 +28,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
@ -35,6 +36,7 @@ import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
@ -257,6 +259,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
// ArrayList spanClauses = new ArrayList();
if (contents instanceof TermQuery
|| contents instanceof MultiTermQuery
|| contents instanceof SynonymQuery
) {
return contents;
}
@ -287,9 +290,11 @@ public class ComplexPhraseQueryParser extends QueryParser {
qc = ((BoostQuery) qc).getQuery();
}
if (qc instanceof BooleanQuery) {
if (qc instanceof BooleanQuery || qc instanceof SynonymQuery) {
ArrayList<SpanQuery> sc = new ArrayList<>();
addComplexPhraseClause(sc, (BooleanQuery) qc);
BooleanQuery booleanCaluse = qc instanceof BooleanQuery ?
(BooleanQuery) qc : convert((SynonymQuery) qc);
addComplexPhraseClause(sc, booleanCaluse);
if (sc.size() > 0) {
allSpanClauses[i] = sc.get(0);
} else {
@ -309,14 +314,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
if (qc instanceof TermQuery) {
TermQuery tq = (TermQuery) qc;
allSpanClauses[i] = new SpanTermQuery(tq.getTerm());
} else {
} else {
throw new IllegalArgumentException("Unknown query type \""
+ qc.getClass().getName()
+ "\" found in phrase query string \""
+ phrasedQueryStringContents + "\"");
}
}
i += 1;
}
if (numNegatives == 0) {
@ -354,6 +359,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
return snot;
}
private BooleanQuery convert(SynonymQuery qc) {
BooleanQuery.Builder bqb = new BooleanQuery.Builder();
for (Term t : qc.getTerms()){
bqb.add(new BooleanClause(new TermQuery(t), Occur.SHOULD));
}
return bqb.build();
}
private void addComplexPhraseClause(List<SpanQuery> spanClauses, BooleanQuery qc) {
ArrayList<SpanQuery> ors = new ArrayList<>();
ArrayList<SpanQuery> nots = new ArrayList<>();

View File

@ -20,6 +20,7 @@ import java.util.HashSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockSynonymAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
@ -39,7 +40,11 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
new DocData("john smith", "1", "developer"),
new DocData("johathon smith", "2", "developer"),
new DocData("john percival smith", "3", "designer"),
new DocData("jackson waits tom", "4", "project manager")
new DocData("jackson waits tom", "4", "project manager"),
new DocData("johny perkins", "5", "orders pizza"),
new DocData("hapax neverson", "6", "never matches"),
new DocData("dog cigar", "7", "just for synonyms"),
new DocData("dogs don't smoke cigarettes", "8", "just for synonyms"),
};
private IndexSearcher searcher;
@ -73,12 +78,30 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
}
public void testSingleTermPhrase() throws Exception {
checkMatches("\"joh*\" \"tom\"", "1,2,3,4");
checkMatches("\"joh*\"","1,2,3,5");
checkMatches("\"joh~\"","1,3,5");
checkMatches("\"joh*\" \"tom\"", "1,2,3,4,5");
checkMatches("+\"j*\" +\"tom\"", "4");
checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3");
checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3,5,8");
checkMatches("+\"j*hn\" +\"sm*h\"", "1,3");
}
public void testSynonyms() throws Exception {
checkMatches("\"dogs\"","8");
MockSynonymAnalyzer synonym = new MockSynonymAnalyzer();
checkMatches("\"dogs\"","7,8",synonym);
// synonym is unidirectional
checkMatches("\"dog\"","7",synonym);
checkMatches("\"dogs cigar*\"","");
checkMatches("\"dog cigar*\"","7");
checkMatches("\"dogs cigar*\"","7", synonym);
checkMatches("\"dog cigar*\"","7", synonym);
checkMatches("\"dogs cigar*\"~2","7,8", synonym);
// synonym is unidirectional
checkMatches("\"dog cigar*\"~2","7", synonym);
}
public void testUnOrderedProximitySearches() throws Exception {
inOrder = true;
@ -98,8 +121,13 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
}
private void checkMatches(String qString, String expectedVals)
throws Exception {
checkMatches(qString, expectedVals, analyzer);
}
private void checkMatches(String qString, String expectedVals, Analyzer anAnalyzer)
throws Exception {
ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, anAnalyzer);
qp.setInOrder(inOrder);
qp.setFuzzyPrefixLength(1); // usually a good idea