Return correct term statistics when a field is not found in a shard (#21922)

If you ask for the term vectors of an artificial document with
term_statistics=true, but a shard does not have any terms of the doc's
field(s), it returns the doc's term vectors values as the shard-level
term statistics. This commit fixes that to return 0 for `ttf` and also
field-level aggregated statistics.

Closes #21906
This commit is contained in:
shaie 2016-12-02 09:14:45 +02:00 committed by Simon Willnauer
parent adf9bd90a4
commit 8fd3637891
2 changed files with 25 additions and 1 deletions

View File

@ -71,7 +71,7 @@ final class TermVectorsWriter {
// if no terms found, take the retrieved term vector fields for stats
if (topLevelTerms == null) {
topLevelTerms = fieldTermVector;
topLevelTerms = EMPTY_TERMS;
}
TermsEnum topLevelIterator = topLevelTerms.iterator();
@ -292,4 +292,18 @@ final class TermVectorsWriter {
// further...
output.writeVLong(Math.max(0, value + 1));
}
/** Implements an empty {@link Terms}. */
private static final Terms EMPTY_TERMS = new Terms() {
@Override public TermsEnum iterator() throws IOException { return TermsEnum.EMPTY; }
@Override public long size() throws IOException { return 0; }
@Override public long getSumTotalTermFreq() throws IOException { return 0; }
@Override public long getSumDocFreq() throws IOException { return 0; }
@Override public int getDocCount() throws IOException { return 0; }
@Override public boolean hasFreqs() { return false; }
@Override public boolean hasOffsets() { return false; }
@Override public boolean hasPositions() { return false; }
@Override public boolean hasPayloads() { return false; }
};
}

View File

@ -848,6 +848,16 @@ public class GetTermVectorsIT extends AbstractTermVectorsTestCase {
.get();
assertThat(resp.isExists(), equalTo(true));
checkBrownFoxTermVector(resp.getFields(), "field1", false);
// Since the index is empty, all of artificial document's "term_statistics" should be 0/absent
Terms terms = resp.getFields().terms("field1");
assertEquals("sumDocFreq should be 0 for a non-existing field!", 0, terms.getSumDocFreq());
assertEquals("sumTotalTermFreq should be 0 for a non-existing field!", 0, terms.getSumTotalTermFreq());
TermsEnum termsEnum = terms.iterator(); // we're guaranteed to receive terms for that field
while (termsEnum.next() != null) {
String term = termsEnum.term().utf8ToString();
assertEquals("term [" + term + "] does not exist in the index; ttf should be 0!", 0, termsEnum.totalTermFreq());
}
}
public void testPerFieldAnalyzer() throws IOException {