diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b067fde0d86..a8f7ee48258 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -204,6 +204,9 @@ Improvements IndexInput description instead of plain IOException (Mike Drob via Mike McCandless) +* LUCENE-7695: ComplexPhraseQueryParser to support query time synonyms (Markus Jelsma + via Mikhail Khludnev) + Optimizations * LUCENE-7641: Optimized point range queries to compute documents that do not diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java index 6e18960f40a..32f4fb31e8c 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java @@ -28,6 +28,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.IndexSearcher; @@ -35,6 +36,7 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery.RewriteMethod; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanNearQuery; @@ -257,6 +259,7 @@ public class ComplexPhraseQueryParser extends QueryParser { // ArrayList spanClauses = new ArrayList(); if (contents instanceof TermQuery || contents instanceof MultiTermQuery + || contents instanceof SynonymQuery ) { return contents; } @@ -287,9 +290,11 @@ public class ComplexPhraseQueryParser extends QueryParser { qc = ((BoostQuery) qc).getQuery(); } - if (qc instanceof BooleanQuery) { + if (qc instanceof BooleanQuery || qc instanceof SynonymQuery) { ArrayList sc = new ArrayList<>(); - addComplexPhraseClause(sc, (BooleanQuery) qc); + BooleanQuery booleanCaluse = qc instanceof BooleanQuery ? + (BooleanQuery) qc : convert((SynonymQuery) qc); + addComplexPhraseClause(sc, booleanCaluse); if (sc.size() > 0) { allSpanClauses[i] = sc.get(0); } else { @@ -309,14 +314,14 @@ public class ComplexPhraseQueryParser extends QueryParser { if (qc instanceof TermQuery) { TermQuery tq = (TermQuery) qc; allSpanClauses[i] = new SpanTermQuery(tq.getTerm()); - } else { + } else { throw new IllegalArgumentException("Unknown query type \"" + qc.getClass().getName() + "\" found in phrase query string \"" + phrasedQueryStringContents + "\""); } - } + i += 1; } if (numNegatives == 0) { @@ -354,6 +359,14 @@ public class ComplexPhraseQueryParser extends QueryParser { return snot; } + private BooleanQuery convert(SynonymQuery qc) { + BooleanQuery.Builder bqb = new BooleanQuery.Builder(); + for (Term t : qc.getTerms()){ + bqb.add(new BooleanClause(new TermQuery(t), Occur.SHOULD)); + } + return bqb.build(); + } + private void addComplexPhraseClause(List spanClauses, BooleanQuery qc) { ArrayList ors = new ArrayList<>(); ArrayList nots = new ArrayList<>(); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java index 28b600ba037..5c45e280774 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java @@ -20,6 +20,7 @@ import java.util.HashSet; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockSynonymAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; @@ -39,7 +40,11 @@ public class TestComplexPhraseQuery extends LuceneTestCase { new DocData("john smith", "1", "developer"), new DocData("johathon smith", "2", "developer"), new DocData("john percival smith", "3", "designer"), - new DocData("jackson waits tom", "4", "project manager") + new DocData("jackson waits tom", "4", "project manager"), + new DocData("johny perkins", "5", "orders pizza"), + new DocData("hapax neverson", "6", "never matches"), + new DocData("dog cigar", "7", "just for synonyms"), + new DocData("dogs don't smoke cigarettes", "8", "just for synonyms"), }; private IndexSearcher searcher; @@ -73,12 +78,30 @@ public class TestComplexPhraseQuery extends LuceneTestCase { } public void testSingleTermPhrase() throws Exception { - checkMatches("\"joh*\" \"tom\"", "1,2,3,4"); + checkMatches("\"joh*\"","1,2,3,5"); + checkMatches("\"joh~\"","1,3,5"); + checkMatches("\"joh*\" \"tom\"", "1,2,3,4,5"); checkMatches("+\"j*\" +\"tom\"", "4"); - checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3"); + checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3,5,8"); checkMatches("+\"j*hn\" +\"sm*h\"", "1,3"); } + public void testSynonyms() throws Exception { + checkMatches("\"dogs\"","8"); + MockSynonymAnalyzer synonym = new MockSynonymAnalyzer(); + checkMatches("\"dogs\"","7,8",synonym); + // synonym is unidirectional + checkMatches("\"dog\"","7",synonym); + checkMatches("\"dogs cigar*\"",""); + checkMatches("\"dog cigar*\"","7"); + checkMatches("\"dogs cigar*\"","7", synonym); + checkMatches("\"dog cigar*\"","7", synonym); + checkMatches("\"dogs cigar*\"~2","7,8", synonym); + // synonym is unidirectional + checkMatches("\"dog cigar*\"~2","7", synonym); + + } + public void testUnOrderedProximitySearches() throws Exception { inOrder = true; @@ -98,8 +121,13 @@ public class TestComplexPhraseQuery extends LuceneTestCase { } private void checkMatches(String qString, String expectedVals) + throws Exception { + checkMatches(qString, expectedVals, analyzer); + } + + private void checkMatches(String qString, String expectedVals, Analyzer anAnalyzer) throws Exception { - ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer); + ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, anAnalyzer); qp.setInOrder(inOrder); qp.setFuzzyPrefixLength(1); // usually a good idea