Don't report terms as live if all it's docs are filtered out
FilterableTermsEnum allows to filter stats by supplying per segment bits. Today if all docs are filtered out the term is still reported as live but shouldn't. Relates to #6211
This commit is contained in:
parent
c593234b7c
commit
72da764261
|
@ -124,48 +124,50 @@ public class FilterableTermsEnum extends TermsEnum {
|
|||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef text) throws IOException {
|
||||
boolean found = false;
|
||||
currentDocFreq = NOT_FOUND;
|
||||
currentTotalTermFreq = NOT_FOUND;
|
||||
int docFreq = 0;
|
||||
long totalTermFreq = 0;
|
||||
for (Holder anEnum : enums) {
|
||||
if (!anEnum.termsEnum.seekExact(text)) {
|
||||
continue;
|
||||
}
|
||||
found = true;
|
||||
if (anEnum.bits == null) {
|
||||
docFreq += anEnum.termsEnum.docFreq();
|
||||
if (docsEnumFlag == DocsEnum.FLAG_FREQS) {
|
||||
long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq();
|
||||
if (totalTermFreq == -1 || leafTotalTermFreq == -1) {
|
||||
totalTermFreq = -1;
|
||||
continue;
|
||||
}
|
||||
totalTermFreq += leafTotalTermFreq;
|
||||
}
|
||||
} else {
|
||||
DocsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.docs(anEnum.bits, anEnum.docsEnum, docsEnumFlag);
|
||||
// 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not
|
||||
if (docsEnumFlag == DocsEnum.FLAG_FREQS) {
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
docFreq++;
|
||||
// docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value
|
||||
// is really 1 or unrecorded when filtering like this
|
||||
totalTermFreq += docsEnum.freq();
|
||||
if (anEnum.termsEnum.seekExact(text)) {
|
||||
if (anEnum.bits == null) {
|
||||
docFreq += anEnum.termsEnum.docFreq();
|
||||
if (docsEnumFlag == DocsEnum.FLAG_FREQS) {
|
||||
long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq();
|
||||
if (totalTermFreq == -1 || leafTotalTermFreq == -1) {
|
||||
totalTermFreq = -1;
|
||||
continue;
|
||||
}
|
||||
totalTermFreq += leafTotalTermFreq;
|
||||
}
|
||||
} else {
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
// docsEnum.freq() behaviour is undefined if docsEnumFlag==DocsEnum.FLAG_NONE so don't bother with call
|
||||
docFreq++;
|
||||
final DocsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.docs(anEnum.bits, anEnum.docsEnum, docsEnumFlag);
|
||||
// 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not
|
||||
if (docsEnumFlag == DocsEnum.FLAG_FREQS) {
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
docFreq++;
|
||||
// docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value
|
||||
// is really 1 or unrecorded when filtering like this
|
||||
totalTermFreq += docsEnum.freq();
|
||||
}
|
||||
} else {
|
||||
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
|
||||
// docsEnum.freq() behaviour is undefined if docsEnumFlag==DocsEnum.FLAG_NONE so don't bother with call
|
||||
docFreq++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (docFreq > 0) {
|
||||
currentDocFreq = docFreq;
|
||||
currentTotalTermFreq = totalTermFreq;
|
||||
current = text;
|
||||
return true;
|
||||
} else {
|
||||
currentDocFreq = NOT_FOUND;
|
||||
currentTotalTermFreq = NOT_FOUND;
|
||||
current = null;
|
||||
return false;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -76,22 +76,18 @@ public class FreqTermsEnum extends FilterableTermsEnum implements Releasable {
|
|||
boolean found = true;
|
||||
if (needDocFreqs) {
|
||||
currentDocFreq = termDocFreqs.get(currentTermOrd);
|
||||
if (currentDocFreq == NOT_FOUND) {
|
||||
found = false;
|
||||
}
|
||||
found = currentDocFreq != NOT_FOUND;
|
||||
}
|
||||
if (needTotalTermFreqs) {
|
||||
currentTotalTermFreq = termsTotalFreqs.get(currentTermOrd);
|
||||
if (currentTotalTermFreq == NOT_FOUND) {
|
||||
found = false;
|
||||
}
|
||||
found = currentTotalTermFreq != NOT_FOUND;
|
||||
}
|
||||
current = found ? text : null;
|
||||
return found;
|
||||
}
|
||||
|
||||
//Cache miss - gather stats
|
||||
boolean found = super.seekExact(text);
|
||||
final boolean found = super.seekExact(text);
|
||||
|
||||
//Cache the result - found or not.
|
||||
if (needDocFreqs) {
|
||||
|
|
Loading…
Reference in New Issue