LUCENE-8531: QueryBuilder#analyzeGraphPhrase now creates one phrase query per finite strings in the graph if the slop is greater than 0.

Span queries cannot be used in this case because they don't handle slop the same way than phrase queries.
This commit is contained in:
Jim Ferenczi 2018-10-19 20:45:16 +02:00
parent 1a8188d92b
commit e1da5f9537
3 changed files with 39 additions and 5 deletions

View File

@ -208,6 +208,10 @@ Bug fixes:
* LUCENE-8522: throw InvalidShapeException when constructing a polygon and
all points are coplanar. (Ignacio Vera)
* LUCENE-8531: QueryBuilder#analyzeGraphPhrase now creates one phrase query per finite strings
in the graph if the slop is greater than 0. Span queries cannot be used in this case because
they don't handle slop the same way than phrase queries. (Steve Rowe, Uwe Schindler, Jim Ferenczi)
New Features
* LUCENE-8496: Selective indexing - modify BKDReader/BKDWriter to allow users

View File

@ -16,7 +16,6 @@
*/
package org.apache.lucene.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
@ -542,13 +541,33 @@ public class QueryBuilder {
}
/**
* Creates a span near (phrase) query from a graph token stream. The articulation points of the graph are visited in
* order and the queries created at each point are merged in the returned near query.
* Creates graph phrase query from the tokenstream contents
*/
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop)
protected Query analyzeGraphPhrase(TokenStream source, String field, int phraseSlop)
throws IOException {
source.reset();
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
if (phraseSlop > 0) {
/**
* Creates a boolean query from the graph token stream by extracting all the finite strings from the graph
* and using them to create phrase queries with the appropriate slop.
*/
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Iterator<TokenStream> it = graph.getFiniteStrings();
while (it.hasNext()) {
Query query = createFieldQuery(it.next(), BooleanClause.Occur.MUST, field, true, phraseSlop);
if (query != null) {
builder.add(query, BooleanClause.Occur.SHOULD);
}
}
return builder.build();
}
/**
* Creates a span near (phrase) query from a graph token stream.
* The articulation points of the graph are visited in order and the queries
* created at each point are merged in the returned near query.
*/
List<SpanQuery> clauses = new ArrayList<>();
int[] articulationPoints = graph.articulationPoints();
int lastState = 0;
@ -610,7 +629,7 @@ public class QueryBuilder {
} else if (clauses.size() == 1) {
return clauses.get(0);
} else {
return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true);
return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), 0, true);
}
}

View File

@ -170,6 +170,17 @@ public class TestQueryBuilder extends LuceneTestCase {
queryBuilder.createPhraseQuery("field", "guinea pig"));
}
public void testMultiWordSynonymsPhraseWithSlop() throws Exception {
BooleanQuery expected = new BooleanQuery.Builder()
.add(new PhraseQuery.Builder().setSlop(4)
.add(new Term("field", "guinea")).add(new Term("field", "pig")).build(), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("field", "cavy")), BooleanClause.Occur.SHOULD)
.build();
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
assertEquals(expected,
queryBuilder.createPhraseQuery("field", "guinea pig", 4));
}
/** forms graph query */
public void testMultiWordSynonymsBoolean() throws Exception {
for (BooleanClause.Occur occur : new BooleanClause.Occur[] {BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST}) {