Cleanup the Lucene utility class.

- removes Lucene.count(IndexSearcher,Query) in favor of IndexSearcher.count(Query)
 - removes EarlyTerminatingCollector.reset(): reusing Collector objects does not
   help given that query execution needs to allocate objects (weights, scorers)
   anyway
 - adds unit tests to Lucene.exists
This commit is contained in:
Adrien Grand 2015-10-09 10:01:14 +02:00
parent 5c43dc501a
commit 275da4821e
10 changed files with 124 additions and 229 deletions

View File

@ -174,7 +174,12 @@ public class TransportExistsAction extends TransportBroadcastAction<ExistsReques
}
context.preProcess();
try {
boolean exists = Lucene.exists(context, context.query(), Lucene.createExistsCollector());
boolean exists;
try {
exists = Lucene.exists(context.searcher(), context.query());
} finally {
context.clearReleasables(SearchContext.Lifetime.COLLECTION);
}
return new ShardExistsResponse(request.shardId(), exists);
} catch (Exception e) {
throw new QueryPhaseExecutionException(context, "failed to execute exists", e);

View File

@ -46,14 +46,11 @@ import org.elasticsearch.common.util.iterable.Iterables;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.text.ParseException;
import java.util.*;
import static org.elasticsearch.common.lucene.search.NoopCollector.NOOP_COLLECTOR;
/**
*
*/
@ -229,27 +226,6 @@ public class Lucene {
}.run();
}
public static long count(IndexSearcher searcher, Query query) throws IOException {
return searcher.count(query);
}
/**
* Performs a count on the <code>searcher</code> for <code>query</code>. Terminates
* early when the count has reached <code>terminateAfter</code>
*/
public static long count(IndexSearcher searcher, Query query, int terminateAfterCount) throws IOException {
EarlyTerminatingCollector countCollector = createCountBasedEarlyTerminatingCollector(terminateAfterCount);
countWithEarlyTermination(searcher, query, countCollector);
return countCollector.count();
}
/**
* Creates count based early termination collector with a threshold of <code>maxCountHits</code>
*/
public final static EarlyTerminatingCollector createCountBasedEarlyTerminatingCollector(int maxCountHits) {
return new EarlyTerminatingCollector(maxCountHits);
}
/**
* Wraps <code>delegate</code> with count based early termination collector with a threshold of <code>maxCountHits</code>
*/
@ -265,99 +241,27 @@ public class Lucene {
}
/**
* Performs an exists (count &gt; 0) query on the <code>searcher</code> for <code>query</code>
* with <code>filter</code> using the given <code>collector</code>
*
* The <code>collector</code> can be instantiated using <code>Lucene.createExistsCollector()</code>
* Check whether there is one or more documents matching the provided query.
*/
public static boolean exists(IndexSearcher searcher, Query query, Filter filter,
EarlyTerminatingCollector collector) throws IOException {
collector.reset();
countWithEarlyTermination(searcher, filter, query, collector);
return collector.exists();
}
/**
* Performs an exists (count &gt; 0) query on the <code>searcher</code> for <code>query</code>
* using the given <code>collector</code>
*
* The <code>collector</code> can be instantiated using <code>Lucene.createExistsCollector()</code>
*/
public static boolean exists(IndexSearcher searcher, Query query, EarlyTerminatingCollector collector) throws IOException {
collector.reset();
countWithEarlyTermination(searcher, query, collector);
return collector.exists();
}
/**
* Calls <code>countWithEarlyTermination(searcher, null, query, collector)</code>
*/
public static boolean countWithEarlyTermination(IndexSearcher searcher, Query query,
EarlyTerminatingCollector collector) throws IOException {
return countWithEarlyTermination(searcher, null, query, collector);
}
/**
* Performs a count on <code>query</code> and <code>filter</code> with early termination using <code>searcher</code>.
* The early termination threshold is specified by the provided <code>collector</code>
*/
public static boolean countWithEarlyTermination(IndexSearcher searcher, Filter filter, Query query,
EarlyTerminatingCollector collector) throws IOException {
try {
if (filter == null) {
searcher.search(query, collector);
} else {
searcher.search(query, filter, collector);
public static boolean exists(IndexSearcher searcher, Query query) throws IOException {
final Weight weight = searcher.createNormalizedWeight(query, false);
// the scorer API should be more efficient at stopping after the first
// match than the bulk scorer API
for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
final Scorer scorer = weight.scorer(context);
if (scorer == null) {
continue;
}
final Bits liveDocs = context.reader().getLiveDocs();
for (int doc = scorer.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = scorer.nextDoc()) {
if (liveDocs == null || liveDocs.get(doc)) {
return true;
}
}
} catch (EarlyTerminationException e) {
// early termination
return true;
}
return false;
}
/**
* Performs an exists (count &gt; 0) query on the searcher from the <code>searchContext</code> for <code>query</code>
* using the given <code>collector</code>
*
* The <code>collector</code> can be instantiated using <code>Lucene.createExistsCollector()</code>
*/
public static boolean exists(SearchContext searchContext, Query query, EarlyTerminatingCollector collector) throws IOException {
collector.reset();
try {
searchContext.searcher().search(query, collector);
} catch (EarlyTerminationException e) {
// ignore, just early termination...
} finally {
searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
}
return collector.exists();
}
/**
* Creates an {@link org.elasticsearch.common.lucene.Lucene.EarlyTerminatingCollector}
* with a threshold of <code>1</code>
*/
public final static EarlyTerminatingCollector createExistsCollector() {
return createCountBasedEarlyTerminatingCollector(1);
}
/**
* Closes the index writer, returning <tt>false</tt> if it failed to close.
*/
public static boolean safeClose(IndexWriter writer) {
if (writer == null) {
return true;
}
try {
writer.close();
return true;
} catch (Throwable e) {
return false;
}
}
public static TopDocs readTopDocs(StreamInput in) throws IOException {
if (in.readBoolean()) {
int totalHits = in.readVInt();
@ -612,19 +516,11 @@ public class Lucene {
private int count = 0;
private LeafCollector leafCollector;
EarlyTerminatingCollector(int maxCountHits) {
this.maxCountHits = maxCountHits;
this.delegate = NOOP_COLLECTOR;
}
EarlyTerminatingCollector(final Collector delegate, int maxCountHits) {
this.maxCountHits = maxCountHits;
this.delegate = (delegate == null) ? NOOP_COLLECTOR : delegate;
this.delegate = Objects.requireNonNull(delegate);
}
public void reset() {
count = 0;
}
public int count() {
return count;
}

View File

@ -1,51 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
import java.io.IOException;
/**
*
*/
public class NoopCollector extends SimpleCollector {
public static final NoopCollector NOOP_COLLECTOR = new NoopCollector();
@Override
public void setScorer(Scorer scorer) throws IOException {
}
@Override
public void collect(int doc) throws IOException {
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
}
@Override
public boolean needsScores() {
return false;
}
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.memory.ExtendedMemoryIndex;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
@ -457,22 +458,22 @@ public class PercolatorService extends AbstractComponent {
@Override
public PercolateShardResponse doPercolate(PercolateShardRequest request, PercolateContext context, boolean isNested) {
long count = 0;
Lucene.EarlyTerminatingCollector collector = Lucene.createExistsCollector();
for (Map.Entry<BytesRef, Query> entry : context.percolateQueries().entrySet()) {
try {
Query existsQuery = entry.getValue();
if (isNested) {
Lucene.exists(context.docSearcher(), entry.getValue(), Queries.newNonNestedFilter(), collector);
} else {
Lucene.exists(context.docSearcher(), entry.getValue(), collector);
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.build();
}
if (Lucene.exists(context.docSearcher(), existsQuery)) {
count ++;
}
} catch (Throwable e) {
logger.debug("[" + entry.getKey() + "] failed to execute query", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
}
if (collector.exists()) {
count++;
}
}
return new PercolateShardResponse(count, context, request.shardId());
}
@ -552,7 +553,6 @@ public class PercolatorService extends AbstractComponent {
long count = 0;
List<BytesRef> matches = new ArrayList<>();
List<Map<String, HighlightField>> hls = new ArrayList<>();
Lucene.EarlyTerminatingCollector collector = Lucene.createExistsCollector();
for (Map.Entry<BytesRef, Query> entry : context.percolateQueries().entrySet()) {
if (context.highlight() != null) {
@ -560,26 +560,27 @@ public class PercolatorService extends AbstractComponent {
context.hitContext().cache().clear();
}
try {
Query existsQuery = entry.getValue();
if (isNested) {
Lucene.exists(context.docSearcher(), entry.getValue(), Queries.newNonNestedFilter(), collector);
} else {
Lucene.exists(context.docSearcher(), entry.getValue(), collector);
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.build();
}
if (Lucene.exists(context.docSearcher(), existsQuery)) {
if (!context.limit || count < context.size()) {
matches.add(entry.getKey());
if (context.highlight() != null) {
highlightPhase.hitExecute(context, context.hitContext());
hls.add(context.hitContext().hit().getHighlightFields());
}
}
count++;
}
} catch (Throwable e) {
logger.debug("[" + entry.getKey() + "] failed to execute query", e);
throw new PercolateException(context.indexShard().shardId(), "failed to execute", e);
}
if (collector.exists()) {
if (!context.limit || count < context.size()) {
matches.add(entry.getKey());
if (context.highlight() != null) {
highlightPhase.hitExecute(context, context.hitContext());
hls.add(context.hitContext().hit().getHighlightFields());
}
}
count++;
}
}
BytesRef[] finalMatches = matches.toArray(new BytesRef[matches.size()]);

View File

@ -19,8 +19,10 @@
package org.elasticsearch.percolator;
import com.carrotsearch.hppc.FloatArrayList;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.lucene.Lucene;
@ -54,7 +56,6 @@ abstract class QueryCollector extends SimpleCollector {
final ESLogger logger;
boolean isNestedDoc = false;
final Lucene.EarlyTerminatingCollector collector = Lucene.createExistsCollector();
BytesRef current;
SortedBinaryDocValues values;
@ -166,6 +167,13 @@ abstract class QueryCollector extends SimpleCollector {
// log???
return;
}
Query existsQuery = query;
if (isNestedDoc) {
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.build();
}
// run the query
try {
if (context.highlight() != null) {
@ -173,12 +181,7 @@ abstract class QueryCollector extends SimpleCollector {
context.hitContext().cache().clear();
}
if (isNestedDoc) {
Lucene.exists(searcher, query, Queries.newNonNestedFilter(), collector);
} else {
Lucene.exists(searcher, query, collector);
}
if (collector.exists()) {
if (Lucene.exists(searcher, existsQuery)) {
if (!limit || counter < size) {
matches.add(BytesRef.deepCopyOf(current));
if (context.highlight() != null) {
@ -230,14 +233,16 @@ abstract class QueryCollector extends SimpleCollector {
// log???
return;
}
Query existsQuery = query;
if (isNestedDoc) {
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.build();
}
// run the query
try {
if (isNestedDoc) {
Lucene.exists(searcher, query, Queries.newNonNestedFilter(), collector);
} else {
Lucene.exists(searcher, query, collector);
}
if (collector.exists()) {
if (Lucene.exists(searcher, existsQuery)) {
topDocsLeafCollector.collect(doc);
postMatch(doc);
}
@ -298,18 +303,20 @@ abstract class QueryCollector extends SimpleCollector {
// log???
return;
}
Query existsQuery = query;
if (isNestedDoc) {
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.build();
}
// run the query
try {
if (context.highlight() != null) {
context.parsedQuery(new ParsedQuery(query));
context.hitContext().cache().clear();
}
if (isNestedDoc) {
Lucene.exists(searcher, query, Queries.newNonNestedFilter(), collector);
} else {
Lucene.exists(searcher, query, collector);
}
if (collector.exists()) {
if (Lucene.exists(searcher, existsQuery)) {
if (!limit || counter < size) {
matches.add(BytesRef.deepCopyOf(current));
scores.add(scorer.score());
@ -363,14 +370,16 @@ abstract class QueryCollector extends SimpleCollector {
// log???
return;
}
Query existsQuery = query;
if (isNestedDoc) {
existsQuery = new BooleanQuery.Builder()
.add(existsQuery, Occur.MUST)
.add(Queries.newNonNestedFilter(), Occur.FILTER)
.build();
}
// run the query
try {
if (isNestedDoc) {
Lucene.exists(searcher, query, Queries.newNonNestedFilter(), collector);
} else {
Lucene.exists(searcher, query, collector);
}
if (collector.exists()) {
if (Lucene.exists(searcher, existsQuery)) {
counter++;
postMatch(doc);
}

View File

@ -104,7 +104,6 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
response.addTerm(resultEntry);
final BytesRefBuilder byteSpare = new BytesRefBuilder();
final EarlyTerminatingCollector collector = Lucene.createExistsCollector();
final CompiledScript collateScript = suggestion.getCollateQueryScript();
final boolean collatePrune = (collateScript != null) && suggestion.collatePrune();
for (int i = 0; i < checkerResult.corrections.length; i++) {
@ -119,7 +118,7 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
final ExecutableScript executable = scriptService.executable(collateScript, vars);
final BytesReference querySource = (BytesReference) executable.run();
final ParsedQuery parsedQuery = suggestion.getQueryParserService().parse(querySource);
collateMatch = Lucene.exists(searcher, parsedQuery.query(), collector);
collateMatch = Lucene.exists(searcher, parsedQuery.query());
}
if (!collateMatch && !collatePrune) {
continue;

View File

@ -20,10 +20,14 @@ package org.elasticsearch.common.lucene;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.Version;
import org.elasticsearch.test.ESTestCase;
@ -322,4 +326,37 @@ public class LuceneTests extends ESTestCase {
writer.close();
dir.close();
}
public void testCount() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir);
try (DirectoryReader reader = w.getReader()) {
// match_all does not match anything on an empty index
IndexSearcher searcher = newSearcher(reader);
assertFalse(Lucene.exists(searcher, new MatchAllDocsQuery()));
}
Document doc = new Document();
w.addDocument(doc);
doc.add(new StringField("foo", "bar", Store.NO));
w.addDocument(doc);
try (DirectoryReader reader = w.getReader()) {
IndexSearcher searcher = newSearcher(reader);
assertTrue(Lucene.exists(searcher, new MatchAllDocsQuery()));
assertFalse(Lucene.exists(searcher, new TermQuery(new Term("baz", "bar"))));
assertTrue(Lucene.exists(searcher, new TermQuery(new Term("foo", "bar"))));
}
w.deleteDocuments(new Term("foo", "bar"));
try (DirectoryReader reader = w.getReader()) {
IndexSearcher searcher = newSearcher(reader);
assertFalse(Lucene.exists(searcher, new TermQuery(new Term("foo", "bar"))));
}
w.close();
dir.close();
}
}

View File

@ -44,23 +44,23 @@ public class MultiPhrasePrefixQueryTests extends ESTestCase {
MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery();
query.add(new Term("field", "aa"));
assertThat(Lucene.count(searcher, query), equalTo(1l));
assertThat(searcher.count(query), equalTo(1));
query = new MultiPhrasePrefixQuery();
query.add(new Term("field", "aaa"));
query.add(new Term("field", "bb"));
assertThat(Lucene.count(searcher, query), equalTo(1l));
assertThat(searcher.count(query), equalTo(1));
query = new MultiPhrasePrefixQuery();
query.setSlop(1);
query.add(new Term("field", "aaa"));
query.add(new Term("field", "cc"));
assertThat(Lucene.count(searcher, query), equalTo(1l));
assertThat(searcher.count(query), equalTo(1));
query = new MultiPhrasePrefixQuery();
query.setSlop(1);
query.add(new Term("field", "xxx"));
assertThat(Lucene.count(searcher, query), equalTo(0l));
assertThat(searcher.count(query), equalTo(0));
}
@Test

View File

@ -65,7 +65,7 @@ public class MoreLikeThisQueryTests extends ESTestCase {
mltQuery.setLikeText("lucene");
mltQuery.setMinTermFrequency(1);
mltQuery.setMinDocFreq(1);
long count = Lucene.count(searcher, mltQuery);
long count = searcher.count(mltQuery);
assertThat(count, equalTo(2l));
reader.close();

View File

@ -20,7 +20,6 @@
package org.elasticsearch.index.engine;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.hamcrest.Description;
import org.hamcrest.Matcher;
@ -46,7 +45,7 @@ public final class EngineSearcherTotalHitsMatcher extends TypeSafeMatcher<Engine
@Override
public boolean matchesSafely(Engine.Searcher searcher) {
try {
this.count = (int) Lucene.count(searcher.searcher(), query);
this.count = (int) searcher.searcher().count(query);
return count == totalHits;
} catch (IOException e) {
return false;