From e1da5f953731b4e2990e054d09ec0bcb2e5146b8 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 19 Oct 2018 20:45:16 +0200 Subject: [PATCH] LUCENE-8531: QueryBuilder#analyzeGraphPhrase now creates one phrase query per finite strings in the graph if the slop is greater than 0. Span queries cannot be used in this case because they don't handle slop the same way than phrase queries. --- lucene/CHANGES.txt | 4 +++ .../org/apache/lucene/util/QueryBuilder.java | 29 +++++++++++++++---- .../apache/lucene/util/TestQueryBuilder.java | 11 +++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 72c0d470f97..23d807b700f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -208,6 +208,10 @@ Bug fixes: * LUCENE-8522: throw InvalidShapeException when constructing a polygon and all points are coplanar. (Ignacio Vera) +* LUCENE-8531: QueryBuilder#analyzeGraphPhrase now creates one phrase query per finite strings + in the graph if the slop is greater than 0. Span queries cannot be used in this case because + they don't handle slop the same way than phrase queries. (Steve Rowe, Uwe Schindler, Jim Ferenczi) + New Features * LUCENE-8496: Selective indexing - modify BKDReader/BKDWriter to allow users diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java index 37b7e3ed922..1b1c41449c5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.util; - import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; @@ -542,13 +541,33 @@ public class QueryBuilder { } /** - * Creates a span near (phrase) query from a graph token stream. The articulation points of the graph are visited in - * order and the queries created at each point are merged in the returned near query. + * Creates graph phrase query from the tokenstream contents */ - protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) + protected Query analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException { source.reset(); GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); + if (phraseSlop > 0) { + /** + * Creates a boolean query from the graph token stream by extracting all the finite strings from the graph + * and using them to create phrase queries with the appropriate slop. + */ + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + Iterator it = graph.getFiniteStrings(); + while (it.hasNext()) { + Query query = createFieldQuery(it.next(), BooleanClause.Occur.MUST, field, true, phraseSlop); + if (query != null) { + builder.add(query, BooleanClause.Occur.SHOULD); + } + } + return builder.build(); + } + + /** + * Creates a span near (phrase) query from a graph token stream. + * The articulation points of the graph are visited in order and the queries + * created at each point are merged in the returned near query. + */ List clauses = new ArrayList<>(); int[] articulationPoints = graph.articulationPoints(); int lastState = 0; @@ -610,7 +629,7 @@ public class QueryBuilder { } else if (clauses.size() == 1) { return clauses.get(0); } else { - return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true); + return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), 0, true); } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java index 1604f519454..afc1238a80c 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java @@ -170,6 +170,17 @@ public class TestQueryBuilder extends LuceneTestCase { queryBuilder.createPhraseQuery("field", "guinea pig")); } + public void testMultiWordSynonymsPhraseWithSlop() throws Exception { + BooleanQuery expected = new BooleanQuery.Builder() + .add(new PhraseQuery.Builder().setSlop(4) + .add(new Term("field", "guinea")).add(new Term("field", "pig")).build(), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("field", "cavy")), BooleanClause.Occur.SHOULD) + .build(); + QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer()); + assertEquals(expected, + queryBuilder.createPhraseQuery("field", "guinea pig", 4)); + } + /** forms graph query */ public void testMultiWordSynonymsBoolean() throws Exception { for (BooleanClause.Occur occur : new BooleanClause.Occur[] {BooleanClause.Occur.SHOULD, BooleanClause.Occur.MUST}) {