Aggs fix - background count for docs should include deleted docs otherwise a term’s docFreq (which includes deleted docs) can exceed the number of docs reported in the index and cause an exception.
The randomisation that deletes documents is also removed from tests as this doc-accounting change would mean the specific scores being expected in tests would now be subject to random variability and so fail. Closes #7951
This commit is contained in:
parent
f0052a58d6
commit
f878f40ae5
|
@ -71,7 +71,9 @@ public class FilterableTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
this.docsEnumFlag = docsEnumFlag;
|
this.docsEnumFlag = docsEnumFlag;
|
||||||
if (filter == null) {
|
if (filter == null) {
|
||||||
numDocs = reader.numDocs();
|
// Important - need to use the doc count that includes deleted docs
|
||||||
|
// or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
|
||||||
|
numDocs = reader.maxDoc();
|
||||||
}
|
}
|
||||||
ApplyAcceptedDocsFilter acceptedDocsFilter = filter == null ? null : new ApplyAcceptedDocsFilter(filter);
|
ApplyAcceptedDocsFilter acceptedDocsFilter = filter == null ? null : new ApplyAcceptedDocsFilter(filter);
|
||||||
List<AtomicReaderContext> leaves = reader.leaves();
|
List<AtomicReaderContext> leaves = reader.leaves();
|
||||||
|
|
|
@ -263,6 +263,49 @@ public class SignificantTermsSignificanceScoreTests extends ElasticsearchIntegra
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDeletesIssue7951() throws Exception {
|
||||||
|
String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";
|
||||||
|
String mappings = "{\"doc\": {\"properties\":{\"text\": {\"type\":\"string\",\"index\":\"not_analyzed\"}}}}";
|
||||||
|
assertAcked(prepareCreate(INDEX_NAME).setSettings(settings).addMapping("doc", mappings));
|
||||||
|
String[] cat1v1 = {"constant", "one"};
|
||||||
|
String[] cat1v2 = {"constant", "uno"};
|
||||||
|
String[] cat2v1 = {"constant", "two"};
|
||||||
|
String[] cat2v2 = {"constant", "duo"};
|
||||||
|
List<IndexRequestBuilder> indexRequestBuilderList = new ArrayList<>();
|
||||||
|
indexRequestBuilderList.add(client().prepareIndex(INDEX_NAME, DOC_TYPE, "1")
|
||||||
|
.setSource(TEXT_FIELD, cat1v1, CLASS_FIELD, "1"));
|
||||||
|
indexRequestBuilderList.add(client().prepareIndex(INDEX_NAME, DOC_TYPE, "2")
|
||||||
|
.setSource(TEXT_FIELD, cat1v2, CLASS_FIELD, "1"));
|
||||||
|
indexRequestBuilderList.add(client().prepareIndex(INDEX_NAME, DOC_TYPE, "3")
|
||||||
|
.setSource(TEXT_FIELD, cat2v1, CLASS_FIELD, "2"));
|
||||||
|
indexRequestBuilderList.add(client().prepareIndex(INDEX_NAME, DOC_TYPE, "4")
|
||||||
|
.setSource(TEXT_FIELD, cat2v2, CLASS_FIELD, "2"));
|
||||||
|
indexRandom(true, false, indexRequestBuilderList);
|
||||||
|
|
||||||
|
// Now create some holes in the index with selective deletes caused by updates.
|
||||||
|
// This is the scenario that caused this issue https://github.com/elasticsearch/elasticsearch/issues/7951
|
||||||
|
// Scoring algorithms throw exceptions if term docFreqs exceed the reported size of the index
|
||||||
|
// from which they are taken so need to make sure this doesn't happen.
|
||||||
|
String[] text = cat1v1;
|
||||||
|
indexRequestBuilderList.clear();
|
||||||
|
for (int i = 0; i < 50; i++) {
|
||||||
|
text = text == cat1v2 ? cat1v1 : cat1v2;
|
||||||
|
indexRequestBuilderList.add(client().prepareIndex(INDEX_NAME, DOC_TYPE, "1").setSource(TEXT_FIELD, text, CLASS_FIELD, "1"));
|
||||||
|
}
|
||||||
|
indexRandom(true, false, indexRequestBuilderList);
|
||||||
|
|
||||||
|
SearchResponse response1 = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE)
|
||||||
|
.addAggregation(new TermsBuilder("class")
|
||||||
|
.field(CLASS_FIELD)
|
||||||
|
.subAggregation(
|
||||||
|
new SignificantTermsBuilder("sig_terms")
|
||||||
|
.field(TEXT_FIELD)
|
||||||
|
.minDocCount(1)))
|
||||||
|
.execute()
|
||||||
|
.actionGet();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBackgroundVsSeparateSet() throws Exception {
|
public void testBackgroundVsSeparateSet() throws Exception {
|
||||||
String type = randomBoolean() ? "string" : "long";
|
String type = randomBoolean() ? "string" : "long";
|
||||||
|
@ -347,7 +390,7 @@ public class SignificantTermsSignificanceScoreTests extends ElasticsearchIntegra
|
||||||
.setSource(TEXT_FIELD, gb, CLASS_FIELD, "0"));
|
.setSource(TEXT_FIELD, gb, CLASS_FIELD, "0"));
|
||||||
indexRequestBuilderList.add(client().prepareIndex(INDEX_NAME, DOC_TYPE, "7")
|
indexRequestBuilderList.add(client().prepareIndex(INDEX_NAME, DOC_TYPE, "7")
|
||||||
.setSource(TEXT_FIELD, "0", CLASS_FIELD, "0"));
|
.setSource(TEXT_FIELD, "0", CLASS_FIELD, "0"));
|
||||||
indexRandom(true, indexRequestBuilderList);
|
indexRandom(true, false, indexRequestBuilderList);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -413,6 +456,6 @@ public class SignificantTermsSignificanceScoreTests extends ElasticsearchIntegra
|
||||||
indexRequestBuilders.add(client().prepareIndex("test", "doc", "" + i)
|
indexRequestBuilders.add(client().prepareIndex("test", "doc", "" + i)
|
||||||
.setSource("class", parts[0], "text", parts[1]));
|
.setSource("class", parts[0], "text", parts[1]));
|
||||||
}
|
}
|
||||||
indexRandom(true, indexRequestBuilders);
|
indexRandom(true, false, indexRequestBuilders);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,7 +69,7 @@ public class TermsShardMinDocCountTests extends ElasticsearchIntegrationTest {
|
||||||
addTermsDocs("5", 3, 1, indexBuilders);//low score but high doc freq
|
addTermsDocs("5", 3, 1, indexBuilders);//low score but high doc freq
|
||||||
addTermsDocs("6", 3, 1, indexBuilders);
|
addTermsDocs("6", 3, 1, indexBuilders);
|
||||||
addTermsDocs("7", 0, 3, indexBuilders);// make sure the terms all get score > 0 except for this one
|
addTermsDocs("7", 0, 3, indexBuilders);// make sure the terms all get score > 0 except for this one
|
||||||
indexRandom(true, indexBuilders);
|
indexRandom(true, false, indexBuilders);
|
||||||
|
|
||||||
// first, check that indeed when not setting the shardMinDocCount parameter 0 terms are returned
|
// first, check that indeed when not setting the shardMinDocCount parameter 0 terms are returned
|
||||||
SearchResponse response = client().prepareSearch(index)
|
SearchResponse response = client().prepareSearch(index)
|
||||||
|
@ -126,7 +126,7 @@ public class TermsShardMinDocCountTests extends ElasticsearchIntegrationTest {
|
||||||
addTermsDocs("4", 1, indexBuilders);
|
addTermsDocs("4", 1, indexBuilders);
|
||||||
addTermsDocs("5", 3, indexBuilders);//low score but high doc freq
|
addTermsDocs("5", 3, indexBuilders);//low score but high doc freq
|
||||||
addTermsDocs("6", 3, indexBuilders);
|
addTermsDocs("6", 3, indexBuilders);
|
||||||
indexRandom(true, indexBuilders);
|
indexRandom(true, false, indexBuilders);
|
||||||
|
|
||||||
// first, check that indeed when not setting the shardMinDocCount parameter 0 terms are returned
|
// first, check that indeed when not setting the shardMinDocCount parameter 0 terms are returned
|
||||||
SearchResponse response = client().prepareSearch(index)
|
SearchResponse response = client().prepareSearch(index)
|
||||||
|
|
Loading…
Reference in New Issue