mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 10:25:15 +00:00
Return correct term statistics when a field is not found in a shard (#21922)
If you ask for the term vectors of an artificial document with term_statistics=true, but a shard does not have any terms of the doc's field(s), it returns the doc's term vectors values as the shard-level term statistics. This commit fixes that to return 0 for `ttf` and also field-level aggregated statistics. Closes #21906
This commit is contained in:
parent
adf9bd90a4
commit
8fd3637891
@ -71,7 +71,7 @@ final class TermVectorsWriter {
|
||||
|
||||
// if no terms found, take the retrieved term vector fields for stats
|
||||
if (topLevelTerms == null) {
|
||||
topLevelTerms = fieldTermVector;
|
||||
topLevelTerms = EMPTY_TERMS;
|
||||
}
|
||||
|
||||
TermsEnum topLevelIterator = topLevelTerms.iterator();
|
||||
@ -292,4 +292,18 @@ final class TermVectorsWriter {
|
||||
// further...
|
||||
output.writeVLong(Math.max(0, value + 1));
|
||||
}
|
||||
|
||||
/** Implements an empty {@link Terms}. */
|
||||
private static final Terms EMPTY_TERMS = new Terms() {
|
||||
@Override public TermsEnum iterator() throws IOException { return TermsEnum.EMPTY; }
|
||||
@Override public long size() throws IOException { return 0; }
|
||||
@Override public long getSumTotalTermFreq() throws IOException { return 0; }
|
||||
@Override public long getSumDocFreq() throws IOException { return 0; }
|
||||
@Override public int getDocCount() throws IOException { return 0; }
|
||||
@Override public boolean hasFreqs() { return false; }
|
||||
@Override public boolean hasOffsets() { return false; }
|
||||
@Override public boolean hasPositions() { return false; }
|
||||
@Override public boolean hasPayloads() { return false; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -848,6 +848,16 @@ public class GetTermVectorsIT extends AbstractTermVectorsTestCase {
|
||||
.get();
|
||||
assertThat(resp.isExists(), equalTo(true));
|
||||
checkBrownFoxTermVector(resp.getFields(), "field1", false);
|
||||
|
||||
// Since the index is empty, all of artificial document's "term_statistics" should be 0/absent
|
||||
Terms terms = resp.getFields().terms("field1");
|
||||
assertEquals("sumDocFreq should be 0 for a non-existing field!", 0, terms.getSumDocFreq());
|
||||
assertEquals("sumTotalTermFreq should be 0 for a non-existing field!", 0, terms.getSumTotalTermFreq());
|
||||
TermsEnum termsEnum = terms.iterator(); // we're guaranteed to receive terms for that field
|
||||
while (termsEnum.next() != null) {
|
||||
String term = termsEnum.term().utf8ToString();
|
||||
assertEquals("term [" + term + "] does not exist in the index; ttf should be 0!", 0, termsEnum.totalTermFreq());
|
||||
}
|
||||
}
|
||||
|
||||
public void testPerFieldAnalyzer() throws IOException {
|
||||
|
Loading…
x
Reference in New Issue
Block a user