From 8fd363789128ab4db93258d6eaf34e40b9197018 Mon Sep 17 00:00:00 2001 From: shaie Date: Fri, 2 Dec 2016 09:14:45 +0200 Subject: [PATCH] Return correct term statistics when a field is not found in a shard (#21922) If you ask for the term vectors of an artificial document with term_statistics=true, but a shard does not have any terms of the doc's field(s), it returns the doc's term vectors values as the shard-level term statistics. This commit fixes that to return 0 for `ttf` and also field-level aggregated statistics. Closes #21906 --- .../action/termvectors/TermVectorsWriter.java | 16 +++++++++++++++- .../action/termvectors/GetTermVectorsIT.java | 10 ++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsWriter.java b/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsWriter.java index 6b5e497b8e5..06eea6367ed 100644 --- a/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsWriter.java +++ b/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsWriter.java @@ -71,7 +71,7 @@ final class TermVectorsWriter { // if no terms found, take the retrieved term vector fields for stats if (topLevelTerms == null) { - topLevelTerms = fieldTermVector; + topLevelTerms = EMPTY_TERMS; } TermsEnum topLevelIterator = topLevelTerms.iterator(); @@ -292,4 +292,18 @@ final class TermVectorsWriter { // further... output.writeVLong(Math.max(0, value + 1)); } + + /** Implements an empty {@link Terms}. */ + private static final Terms EMPTY_TERMS = new Terms() { + @Override public TermsEnum iterator() throws IOException { return TermsEnum.EMPTY; } + @Override public long size() throws IOException { return 0; } + @Override public long getSumTotalTermFreq() throws IOException { return 0; } + @Override public long getSumDocFreq() throws IOException { return 0; } + @Override public int getDocCount() throws IOException { return 0; } + @Override public boolean hasFreqs() { return false; } + @Override public boolean hasOffsets() { return false; } + @Override public boolean hasPositions() { return false; } + @Override public boolean hasPayloads() { return false; } + }; + } diff --git a/core/src/test/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java b/core/src/test/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java index 3835edbbe9a..765e39b3fbf 100644 --- a/core/src/test/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java +++ b/core/src/test/java/org/elasticsearch/action/termvectors/GetTermVectorsIT.java @@ -848,6 +848,16 @@ public class GetTermVectorsIT extends AbstractTermVectorsTestCase { .get(); assertThat(resp.isExists(), equalTo(true)); checkBrownFoxTermVector(resp.getFields(), "field1", false); + + // Since the index is empty, all of artificial document's "term_statistics" should be 0/absent + Terms terms = resp.getFields().terms("field1"); + assertEquals("sumDocFreq should be 0 for a non-existing field!", 0, terms.getSumDocFreq()); + assertEquals("sumTotalTermFreq should be 0 for a non-existing field!", 0, terms.getSumTotalTermFreq()); + TermsEnum termsEnum = terms.iterator(); // we're guaranteed to receive terms for that field + while (termsEnum.next() != null) { + String term = termsEnum.term().utf8ToString(); + assertEquals("term [" + term + "] does not exist in the index; ttf should be 0!", 0, termsEnum.totalTermFreq()); + } } public void testPerFieldAnalyzer() throws IOException {