Early termination with index sorting should not set terminated_early in the response (#26597)

Early termination with index sorting always return the best top N in the response but set the flag `terminated_early`
in the response. This can be confusing because we use the same flag for `terminate_after` which on the contrary returns partial results.
This change removes the flag when results are not partial (early termination due to index sorting) and keeps it only when `terminate_after` is used.

Closes #26408
This commit is contained in:
Jim Ferenczi 2017-09-26 11:37:11 +02:00 committed by GitHub
parent 6189c54c84
commit 74473c1c3d
5 changed files with 48 additions and 59 deletions

View File

@ -217,13 +217,11 @@ abstract class QueryCollectorContext {
boolean trackTotalHits,
boolean shouldCollect) {
return new QueryCollectorContext(REASON_SEARCH_TERMINATE_AFTER_COUNT) {
private BooleanSupplier terminatedEarlySupplier;
private IntSupplier countSupplier = null;
@Override
Collector create(Collector in) throws IOException {
EarlyTerminatingSortingCollector sortingCollector = new EarlyTerminatingSortingCollector(in, indexSort, numHits);
terminatedEarlySupplier = sortingCollector::terminatedEarly;
Collector collector = sortingCollector;
if (trackTotalHits) {
int count = shouldCollect ? -1 : shortcutTotalHitCount(reader, query);
@ -240,9 +238,6 @@ abstract class QueryCollectorContext {
@Override
void postProcess(QuerySearchResult result, boolean hasCollected) throws IOException {
if (terminatedEarlySupplier.getAsBoolean()) {
result.terminatedEarly(true);
}
if (countSupplier != null) {
final TopDocs topDocs = result.topDocs();
topDocs.totalHits = countSupplier.getAsInt();

View File

@ -38,7 +38,10 @@ import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.FilterCollector;
import org.apache.lucene.search.FilterLeafCollector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
@ -64,10 +67,8 @@ import java.util.concurrent.atomic.AtomicBoolean;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.lessThan;
public class QueryPhaseTests extends IndexShardTestCase {
@ -412,30 +413,19 @@ public class QueryPhaseTests extends IndexShardTestCase {
context.setTask(new SearchTask(123L, "", "", "", null));
context.sort(new SortAndFormats(sort, new DocValueFormat[] {DocValueFormat.RAW}));
final AtomicBoolean collected = new AtomicBoolean();
final IndexReader reader = DirectoryReader.open(dir);
IndexSearcher contextSearcher = new IndexSearcher(reader) {
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
collected.set(true);
super.search(leaves, weight, collector);
}
};
IndexSearcher contextSearcher = new IndexSearcher(reader);
QueryPhase.execute(context, contextSearcher, checkCancelled -> {}, sort);
assertTrue(collected.get());
assertTrue(context.queryResult().terminatedEarly());
assertThat(context.queryResult().topDocs().totalHits, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1));
assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class));
FieldDoc fieldDoc = (FieldDoc) context.queryResult().topDocs().scoreDocs[0];
assertThat(fieldDoc.fields[0], equalTo(1));
{
collected.set(false);
context.parsedPostFilter(new ParsedQuery(new MinDocQuery(1)));
QueryPhase.execute(context, contextSearcher, checkCancelled -> {}, sort);
assertTrue(collected.get());
assertTrue(context.queryResult().terminatedEarly());
assertNull(context.queryResult().terminatedEarly());
assertThat(context.queryResult().topDocs().totalHits, equalTo(numDocs - 1L));
assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1));
assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class));
@ -444,10 +434,8 @@ public class QueryPhaseTests extends IndexShardTestCase {
final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
context.queryCollectors().put(TotalHitCountCollector.class, totalHitCountCollector);
collected.set(false);
QueryPhase.execute(context, contextSearcher, checkCancelled -> {}, sort);
assertTrue(collected.get());
assertTrue(context.queryResult().terminatedEarly());
assertNull(context.queryResult().terminatedEarly());
assertThat(context.queryResult().topDocs().totalHits, equalTo((long) numDocs));
assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1));
assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class));
@ -457,27 +445,19 @@ public class QueryPhaseTests extends IndexShardTestCase {
}
{
collected.set(false);
contextSearcher = getAssertingEarlyTerminationSearcher(reader, 1);
context.trackTotalHits(false);
QueryPhase.execute(context, contextSearcher, checkCancelled -> {}, sort);
assertTrue(collected.get());
assertTrue(context.queryResult().terminatedEarly());
assertThat(context.queryResult().topDocs().totalHits, lessThan((long) numDocs));
assertNull(context.queryResult().terminatedEarly());
assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1));
assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class));
assertThat(fieldDoc.fields[0], anyOf(equalTo(1), equalTo(2)));
final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
context.queryCollectors().put(TotalHitCountCollector.class, totalHitCountCollector);
collected.set(false);
QueryPhase.execute(context, contextSearcher, checkCancelled -> {}, sort);
assertTrue(collected.get());
assertTrue(context.queryResult().terminatedEarly());
assertThat(context.queryResult().topDocs().totalHits, lessThan((long) numDocs));
assertNull(context.queryResult().terminatedEarly());
assertThat(context.queryResult().topDocs().scoreDocs.length, equalTo(1));
assertThat(context.queryResult().topDocs().scoreDocs[0], instanceOf(FieldDoc.class));
assertThat(fieldDoc.fields[0], anyOf(equalTo(1), equalTo(2)));
assertThat(totalHitCountCollector.getTotalHits(), equalTo(numDocs));
}
reader.close();
dir.close();
@ -498,8 +478,9 @@ public class QueryPhaseTests extends IndexShardTestCase {
doc.add(new NumericDocValuesField("tiebreaker", i));
w.addDocument(doc);
}
// Make sure that we can early terminate queries on this index
w.forceMerge(3);
if (randomBoolean()) {
w.forceMerge(randomIntBetween(1, 10));
}
w.close();
TestSearchContext context = new TestSearchContext(null, indexShard);
@ -513,28 +494,21 @@ public class QueryPhaseTests extends IndexShardTestCase {
context.setSize(10);
context.sort(new SortAndFormats(sort, new DocValueFormat[] {DocValueFormat.RAW, DocValueFormat.RAW}));
final AtomicBoolean collected = new AtomicBoolean();
final IndexReader reader = DirectoryReader.open(dir);
IndexSearcher contextSearcher = new IndexSearcher(reader) {
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
collected.set(true);
super.search(leaves, weight, collector);
}
};
IndexSearcher contextSearcher = new IndexSearcher(reader);
QueryPhase.execute(context, contextSearcher, checkCancelled -> {}, sort);
assertThat(context.queryResult().topDocs().totalHits, equalTo((long) numDocs));
assertTrue(collected.get());
assertNull(context.queryResult().terminatedEarly());
assertThat(context.terminateAfter(), equalTo(0));
assertThat(context.queryResult().getTotalHits(), equalTo((long) numDocs));
int sizeMinus1 = context.queryResult().topDocs().scoreDocs.length - 1;
FieldDoc lastDoc = (FieldDoc) context.queryResult().topDocs().scoreDocs[sizeMinus1];
contextSearcher = getAssertingEarlyTerminationSearcher(reader, 10);
QueryPhase.execute(context, contextSearcher, checkCancelled -> {}, sort);
assertNull(context.queryResult().terminatedEarly());
assertThat(context.queryResult().topDocs().totalHits, equalTo((long) numDocs));
assertTrue(collected.get());
assertTrue(context.queryResult().terminatedEarly());
assertThat(context.terminateAfter(), equalTo(0));
assertThat(context.queryResult().getTotalHits(), equalTo((long) numDocs));
FieldDoc firstDoc = (FieldDoc) context.queryResult().topDocs().scoreDocs[0];
@ -551,4 +525,37 @@ public class QueryPhaseTests extends IndexShardTestCase {
reader.close();
dir.close();
}
static IndexSearcher getAssertingEarlyTerminationSearcher(IndexReader reader, int size) {
return new IndexSearcher(reader) {
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
final Collector in = new AssertingEalyTerminationFilterCollector(collector, size);
super.search(leaves, weight, in);
}
};
}
private static class AssertingEalyTerminationFilterCollector extends FilterCollector {
private final int size;
AssertingEalyTerminationFilterCollector(Collector in, int size) {
super(in);
this.size = size;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final LeafCollector in = super.getLeafCollector(context);
return new FilterLeafCollector(in) {
int collected;
@Override
public void collect(int doc) throws IOException {
assert collected <= size : "should not collect more than " + size + " doc per segment, got " + collected;
++ collected;
super.collect(doc);
}
};
}
}
}

View File

@ -313,7 +313,6 @@ public class SimpleSearchIT extends ESIntegTestCase {
refresh();
SearchResponse searchResponse;
boolean hasEarlyTerminated = false;
for (int i = 1; i < max; i++) {
searchResponse = client().prepareSearch("test")
.addDocValueField("rank")
@ -321,16 +320,11 @@ public class SimpleSearchIT extends ESIntegTestCase {
.addSort("rank", SortOrder.ASC)
.setSize(i).execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(-1L));
if (searchResponse.isTerminatedEarly() != null) {
assertTrue(searchResponse.isTerminatedEarly());
hasEarlyTerminated = true;
}
for (int j = 0; j < i; j++) {
assertThat(searchResponse.getHits().getAt(j).field("rank").getValue(),
equalTo((long) j));
}
}
assertTrue(hasEarlyTerminated);
}
public void testInsaneFromAndSize() throws Exception {

View File

@ -196,16 +196,13 @@ as soon as N documents have been collected per segment.
"hits" : []
},
"took": 20,
"terminated_early": true, <2>
"timed_out": false
}
--------------------------------------------------
// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/]
// TESTRESPONSE[s/"took": 20,/"took": "$body.took",/]
// TESTRESPONSE[s/"terminated_early": true,//]
<1> The total number of hits matching the query is unknown because of early termination.
<2> Indicates whether the top docs retrieval has actually terminated_early.
NOTE: Aggregations will collect all documents that match the query regardless of the value of `track_total_hits`

View File

@ -98,7 +98,6 @@
sort: ["rank"]
size: 1
- is_true: terminated_early
- match: {hits.total: 8 }
- length: {hits.hits: 1 }
- match: {hits.hits.0._id: "2" }
@ -113,7 +112,6 @@
track_total_hits: false
size: 1
- match: {terminated_early: true}
- match: {hits.total: -1 }
- length: {hits.hits: 1 }
- match: {hits.hits.0._id: "2" }
@ -134,7 +132,6 @@
body:
sort: _doc
- is_false: terminated_early
- match: {hits.total: 8 }
- length: {hits.hits: 8 }
- match: {hits.hits.0._id: "2" }
@ -156,7 +153,6 @@
track_total_hits: false
size: 3
- match: {terminated_early: true }
- match: {hits.total: -1 }
- length: {hits.hits: 3 }
- match: {hits.hits.0._id: "2" }