Remove custom QueryBuilder#analyzeGraphPhrase (#35983)

Now that https://issues.apache.org/jira/browse/LUCENE-8479 is fixed
we can remove the custom implementation of QueryBuilder#analyzeGraphPhrase
in the match QueryBuilder.
This commit is contained in:
Jim Ferenczi 2018-11-28 20:15:27 +01:00 committed by GitHub
parent 4b85769d24
commit 9ca3a06475
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 37 additions and 103 deletions

View File

@ -44,7 +44,6 @@ import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
@ -58,9 +57,6 @@ import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.support.QueryParsers;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import static org.elasticsearch.common.lucene.search.Queries.newLenientFieldQuery;
import static org.elasticsearch.common.lucene.search.Queries.newUnmappedFieldQuery;
@ -358,8 +354,7 @@ public class MatchQuery {
return blendPhraseQuery((PhraseQuery) query, mapper);
}
return query;
}
catch (IllegalArgumentException | IllegalStateException e) {
} catch (IllegalArgumentException | IllegalStateException e) {
if (lenient) {
return newLenientFieldQuery(field, e);
}
@ -372,8 +367,7 @@ public class MatchQuery {
try {
checkForPositions(field);
return mapper.multiPhraseQuery(field, stream, slop, enablePositionIncrements);
}
catch (IllegalArgumentException | IllegalStateException e) {
} catch (IllegalArgumentException | IllegalStateException e) {
if (lenient) {
return newLenientFieldQuery(field, e);
}
@ -464,9 +458,9 @@ public class MatchQuery {
} else if (query instanceof SpanNearQuery) {
SpanNearQuery spanNearQuery = (SpanNearQuery) query;
SpanQuery[] clauses = spanNearQuery.getClauses();
if (clauses[clauses.length-1] instanceof SpanTermQuery) {
clauses[clauses.length-1] = new SpanMultiTermQueryWrapper<>(
new PrefixQuery(((SpanTermQuery) clauses[clauses.length-1]).getTerm())
if (clauses[clauses.length - 1] instanceof SpanTermQuery) {
clauses[clauses.length - 1] = new SpanMultiTermQueryWrapper<>(
new PrefixQuery(((SpanTermQuery) clauses[clauses.length - 1]).getTerm())
);
}
SpanNearQuery newQuery = new SpanNearQuery(clauses, spanNearQuery.getSlop(), spanNearQuery.isInOrder());
@ -508,82 +502,6 @@ public class MatchQuery {
}
return query;
}
/**
* Overrides {@link QueryBuilder#analyzeGraphPhrase(TokenStream, String, int)} to add
* a limit (see {@link BooleanQuery#getMaxClauseCount()}) to the number of {@link SpanQuery}
* that this method can create.
*
* TODO Remove when https://issues.apache.org/jira/browse/LUCENE-8479 is fixed.
*/
@Override
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
source.reset();
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
List<SpanQuery> clauses = new ArrayList<>();
int[] articulationPoints = graph.articulationPoints();
int lastState = 0;
int maxBooleanClause = BooleanQuery.getMaxClauseCount();
for (int i = 0; i <= articulationPoints.length; i++) {
int start = lastState;
int end = -1;
if (i < articulationPoints.length) {
end = articulationPoints[i];
}
lastState = end;
final SpanQuery queryPos;
if (graph.hasSidePath(start)) {
List<SpanQuery> queries = new ArrayList<>();
Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
while (it.hasNext()) {
TokenStream ts = it.next();
SpanQuery q = createSpanQuery(ts, field);
if (q != null) {
if (queries.size() >= maxBooleanClause) {
throw new BooleanQuery.TooManyClauses();
}
queries.add(q);
}
}
if (queries.size() > 0) {
queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0]));
} else {
queryPos = null;
}
} else {
Term[] terms = graph.getTerms(field, start);
assert terms.length > 0;
if (terms.length >= maxBooleanClause) {
throw new BooleanQuery.TooManyClauses();
}
if (terms.length == 1) {
queryPos = new SpanTermQuery(terms[0]);
} else {
SpanTermQuery[] orClauses = new SpanTermQuery[terms.length];
for (int idx = 0; idx < terms.length; idx++) {
orClauses[idx] = new SpanTermQuery(terms[idx]);
}
queryPos = new SpanOrQuery(orClauses);
}
}
if (queryPos != null) {
if (clauses.size() >= maxBooleanClause) {
throw new BooleanQuery.TooManyClauses();
}
clauses.add(queryPos);
}
}
if (clauses.isEmpty()) {
return null;
} else if (clauses.size() == 1) {
return clauses.get(0);
} else {
return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true);
}
}
}
/**

View File

@ -666,17 +666,23 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
// span query with slop
query = queryParser.parse("\"that guinea pig smells\"~2");
expectedQuery = new SpanNearQuery.Builder(STRING_FIELD_NAME, true)
.addClause(new SpanTermQuery(new Term(STRING_FIELD_NAME, "that")))
.addClause(
new SpanOrQuery(
new SpanNearQuery.Builder(STRING_FIELD_NAME, true)
.addClause(new SpanTermQuery(new Term(STRING_FIELD_NAME, "guinea")))
.addClause(new SpanTermQuery(new Term(STRING_FIELD_NAME, "pig"))).build(),
new SpanTermQuery(new Term(STRING_FIELD_NAME, "cavy"))))
.addClause(new SpanTermQuery(new Term(STRING_FIELD_NAME, "smells")))
PhraseQuery pq1 = new PhraseQuery.Builder()
.add(new Term(STRING_FIELD_NAME, "that"))
.add(new Term(STRING_FIELD_NAME, "guinea"))
.add(new Term(STRING_FIELD_NAME, "pig"))
.add(new Term(STRING_FIELD_NAME, "smells"))
.setSlop(2)
.build();
PhraseQuery pq2 = new PhraseQuery.Builder()
.add(new Term(STRING_FIELD_NAME, "that"))
.add(new Term(STRING_FIELD_NAME, "cavy"))
.add(new Term(STRING_FIELD_NAME, "smells"))
.setSlop(2)
.build();
expectedQuery = new BooleanQuery.Builder()
.add(pq1, Occur.SHOULD)
.add(pq2, Occur.SHOULD)
.build();
assertThat(query, Matchers.equalTo(expectedQuery));
}
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
@ -536,16 +537,25 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
// phrase with slop
query = parser.parse("big \"tiny guinea pig\"~2");
PhraseQuery pq1 = new PhraseQuery.Builder()
.add(new Term(STRING_FIELD_NAME, "tiny"))
.add(new Term(STRING_FIELD_NAME, "guinea"))
.add(new Term(STRING_FIELD_NAME, "pig"))
.setSlop(2)
.build();
PhraseQuery pq2 = new PhraseQuery.Builder()
.add(new Term(STRING_FIELD_NAME, "tiny"))
.add(new Term(STRING_FIELD_NAME, "cavy"))
.setSlop(2)
.build();
expectedQuery = new BooleanQuery.Builder()
.add(new TermQuery(new Term(STRING_FIELD_NAME, "big")), defaultOp)
.add(new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term(STRING_FIELD_NAME, "tiny")),
new SpanOrQuery(
new SpanNearQuery(new SpanQuery[] { span1, span2 }, 0, true),
new SpanTermQuery(new Term(STRING_FIELD_NAME, "cavy"))
)
}, 2, true), defaultOp)
.add(new BooleanQuery.Builder()
.add(pq1, BooleanClause.Occur.SHOULD)
.add(pq2, BooleanClause.Occur.SHOULD)
.build(),
defaultOp)
.build();
assertThat(query, equalTo(expectedQuery));
}