LUCENE-8531: QueryBuilder#analyzeGraphPhrase now creates one phrase query per finite strings in the graph if the slop is greater than 0.

Span queries cannot be used in this case because they don't handle slop the same way than phrase queries.
This commit is contained in:
Jim Ferenczi 2018-10-19 20:45:16 +02:00
parent 1a8188d92b
commit e1da5f9537
3 changed files with 39 additions and 5 deletions

View File

@ -208,6 +208,10 @@ Bug fixes:
* LUCENE-8522: throw InvalidShapeException when constructing a polygon and * LUCENE-8522: throw InvalidShapeException when constructing a polygon and
all points are coplanar. (Ignacio Vera) all points are coplanar. (Ignacio Vera)
* LUCENE-8531: QueryBuilder#analyzeGraphPhrase now creates one phrase query per finite strings
in the graph if the slop is greater than 0. Span queries cannot be used in this case because
they don't handle slop the same way than phrase queries. (Steve Rowe, Uwe Schindler, Jim Ferenczi)
New Features New Features
* LUCENE-8496: Selective indexing - modify BKDReader/BKDWriter to allow users * LUCENE-8496: Selective indexing - modify BKDReader/BKDWriter to allow users

View File

@ -16,7 +16,6 @@
*/ */
package org.apache.lucene.util; package org.apache.lucene.util;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
@ -542,13 +541,33 @@ public class QueryBuilder {
} }
/** /**
* Creates a span near (phrase) query from a graph token stream. The articulation points of the graph are visited in * Creates graph phrase query from the tokenstream contents
* order and the queries created at each point are merged in the returned near query.
*/ */
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) protected Query analyzeGraphPhrase(TokenStream source, String field, int phraseSlop)
throws IOException { throws IOException {
source.reset(); source.reset();
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
if (phraseSlop > 0) {
/**
* Creates a boolean query from the graph token stream by extracting all the finite strings from the graph
* and using them to create phrase queries with the appropriate slop.
*/
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Iterator<TokenStream> it = graph.getFiniteStrings();
while (it.hasNext()) {
Query query = createFieldQuery(it.next(), BooleanClause.Occur.MUST, field, true, phraseSlop);
if (query != null) {
builder.add(query, BooleanClause.Occur.SHOULD);
}
}
return builder.build();
}
/**
* Creates a span near (phrase) query from a graph token stream.
* The articulation points of the graph are visited in order and the queries
* created at each point are merged in the returned near query.
*/
List<SpanQuery> clauses = new ArrayList<>(); List<SpanQuery> clauses = new ArrayList<>();
int[] articulationPoints = graph.articulationPoints(); int[] articulationPoints = graph.articulationPoints();
int lastState = 0; int lastState = 0;
@ -610,7 +629,7 @@ public class QueryBuilder {
} else if (clauses.size() == 1) { } else if (clauses.size() == 1) {
return clauses.get(0); return clauses.get(0);
} else { } else {
return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true); return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), 0, true);
} }
} }

View File

@ -170,6 +170,17 @@ public class TestQueryBuilder extends LuceneTestCase {
queryBuilder.createPhraseQuery("field", "guinea pig")); queryBuilder.createPhraseQuery("field", "guinea pig"));
} }
public void testMultiWordSynonymsPhraseWithSlop() throws Exception {
BooleanQuery expected = new BooleanQuery.Builder()
.add(new PhraseQuery.Builder().setSlop(4)
.add(new Term("field", "guinea")).add(new Term("field", "pig")).build(), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("field", "cavy")), BooleanClause.Occur.SHOULD)
.build();
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
assertEquals(expected,
queryBuilder.createPhraseQuery("field", "guinea pig", 4));
}
/** forms graph query */ /** forms graph query */
public void testMultiWordSynonymsBoolean() throws Exception { public void testMultiWordSynonymsBoolean() throws Exception {
for (BooleanClause.Occur occur : new BooleanClause.Occur[] {BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST}) { for (BooleanClause.Occur occur : new BooleanClause.Occur[] {BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST}) {