LUCENE-2142: if FieldsCache.getStrings/Index is mis-used (more than 1 term per doc), stop loading once number of terms > number of docs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@957516 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2010-06-24 11:57:20 +00:00
parent 18d21f9f11
commit 50ab75baa3
1 changed files with 34 additions and 3 deletions

View File

@ -802,6 +802,14 @@ class FieldCacheImpl implements FieldCache {
int startTermsBPV;
int startNumUniqueTerms;
int maxDoc = reader.maxDoc();
final int termCountHardLimit;
if (maxDoc == Integer.MAX_VALUE) {
termCountHardLimit = Integer.MAX_VALUE;
} else {
termCountHardLimit = maxDoc+1;
}
if (terms != null) {
// Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
@ -813,11 +821,17 @@ class FieldCacheImpl implements FieldCache {
numUniqueTerms = -1;
}
if (numUniqueTerms != -1) {
if (numUniqueTerms > termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
numUniqueTerms = termCountHardLimit;
}
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
if (numUniqueTerms > Integer.MAX_VALUE-1) {
throw new IllegalStateException("this field has too many (" + numUniqueTerms + ") unique terms");
}
startNumUniqueTerms = (int) numUniqueTerms;
} else {
startBytesBPV = 1;
@ -847,6 +861,10 @@ class FieldCacheImpl implements FieldCache {
if (term == null) {
break;
}
if (termOrd >= termCountHardLimit) {
break;
}
if (termOrd == termOrdToBytesOffset.size()) {
// NOTE: this code only runs if the incoming
// reader impl doesn't implement
@ -925,6 +943,8 @@ class FieldCacheImpl implements FieldCache {
final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue();
final int termCountHardLimit = reader.maxDoc();
// Holds the actual term data, expanded.
final PagedBytes bytes = new PagedBytes(15);
@ -941,6 +961,9 @@ class FieldCacheImpl implements FieldCache {
numUniqueTerms = -1;
}
if (numUniqueTerms != -1) {
if (numUniqueTerms > termCountHardLimit) {
numUniqueTerms = termCountHardLimit;
}
startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
} else {
startBPV = 1;
@ -955,10 +978,18 @@ class FieldCacheImpl implements FieldCache {
bytes.copyUsingLengthPrefix(new BytesRef());
if (terms != null) {
int termCount = 0;
final TermsEnum termsEnum = terms.iterator();
final Bits delDocs = MultiFields.getDeletedDocs(reader);
DocsEnum docs = null;
while(true) {
if (termCount++ == termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
break;
}
final BytesRef term = termsEnum.next();
if (term == null) {
break;