LUCENE-4776: also do not cache huge arrays for non-existent/unindexed: just return EMPTY

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1445897 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-02-13 20:19:33 +00:00
parent fd3244cfea
commit dc7bec58b1
4 changed files with 239 additions and 26 deletions

View File

@ -52,37 +52,105 @@ public interface FieldCache {
public static abstract class Bytes {
/** Return a single Byte representation of this field's value. */
public abstract byte get(int docID);
/** Zero value for every document */
public static final Bytes EMPTY = new Bytes() {
@Override
public byte get(int docID) {
return 0;
}
};
}
/** Field values as 16-bit signed shorts */
public static abstract class Shorts {
/** Return a short representation of this field's value. */
public abstract short get(int docID);
/** Zero value for every document */
public static final Shorts EMPTY = new Shorts() {
@Override
public short get(int docID) {
return 0;
}
};
}
/** Field values as 32-bit signed integers */
public static abstract class Ints {
/** Return an integer representation of this field's value. */
public abstract int get(int docID);
/** Zero value for every document */
public static final Ints EMPTY = new Ints() {
@Override
public int get(int docID) {
return 0;
}
};
}
/** Field values as 32-bit signed long integers */
public static abstract class Longs {
/** Return an long representation of this field's value. */
public abstract long get(int docID);
/** Zero value for every document */
public static final Longs EMPTY = new Longs() {
@Override
public long get(int docID) {
return 0;
}
};
}
/** Field values as 32-bit floats */
public static abstract class Floats {
/** Return an float representation of this field's value. */
public abstract float get(int docID);
/** Zero value for every document */
public static final Floats EMPTY = new Floats() {
@Override
public float get(int docID) {
return 0;
}
};
}
/** Field values as 64-bit doubles */
public static abstract class Doubles {
/** Return an double representation of this field's value. */
public abstract double get(int docID);
/** Zero value for every document */
public static final Doubles EMPTY = new Doubles() {
@Override
public double get(int docID) {
return 0;
}
};
}
/** Returns MISSING/-1 ordinal for every document */
public static final SortedDocValues EMPTY_TERMSINDEX = new SortedDocValues() {
@Override
public int getOrd(int docID) {
return -1;
}
@Override
public void lookupOrd(int ord, BytesRef result) {
result.bytes = MISSING;
result.offset = 0;
result.length = 0;
}
@Override
public int getValueCount() {
return 0;
}
};
/**
* Placeholder indicating creation of this cache is currently in-progress.

View File

@ -373,8 +373,12 @@ class FieldCacheImpl implements FieldCache {
};
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null && info.hasDocValues()) {
if (info == null) {
return Bytes.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Bytes.EMPTY;
}
return (Bytes) caches.get(Byte.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
@ -463,8 +467,12 @@ class FieldCacheImpl implements FieldCache {
};
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null && info.hasDocValues()) {
if (info == null) {
return Shorts.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Shorts.EMPTY;
}
return (Shorts) caches.get(Short.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
@ -551,8 +559,12 @@ class FieldCacheImpl implements FieldCache {
};
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null && info.hasDocValues()) {
if (info == null) {
return Ints.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Ints.EMPTY;
}
return (Ints) caches.get(Integer.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
@ -649,8 +661,17 @@ class FieldCacheImpl implements FieldCache {
}
}
public Bits getDocsWithField(AtomicReader reader, String field)
throws IOException {
public Bits getDocsWithField(AtomicReader reader, String field) throws IOException {
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// field does not exist or has no value
return new Bits.MatchNoBits(reader.maxDoc());
} else if (fieldInfo.hasDocValues()) {
// doc values are dense
return new Bits.MatchAllBits(reader.maxDoc());
} else if (!fieldInfo.isIndexed()) {
return new Bits.MatchNoBits(reader.maxDoc());
}
return (Bits) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, null), false);
}
@ -663,17 +684,8 @@ class FieldCacheImpl implements FieldCache {
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
throws IOException {
final String field = key.field;
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
final int maxDoc = reader.maxDoc();
if (fieldInfo == null) {
// field does not exist or has no value
return new Bits.MatchNoBits(maxDoc);
} else if (fieldInfo.hasDocValues()) {
// doc values are dense
return new Bits.MatchAllBits(maxDoc);
}
// Visit all docs that have terms for this field
FixedBitSet res = null;
Terms terms = reader.terms(field);
@ -741,8 +753,12 @@ class FieldCacheImpl implements FieldCache {
};
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null && info.hasDocValues()) {
if (info == null) {
return Floats.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Floats.EMPTY;
}
return (Floats) caches.get(Float.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
@ -848,8 +864,12 @@ class FieldCacheImpl implements FieldCache {
};
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null && info.hasDocValues()) {
if (info == null) {
return Longs.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Longs.EMPTY;
}
return (Longs) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
@ -955,8 +975,12 @@ class FieldCacheImpl implements FieldCache {
};
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null && info.hasDocValues()) {
if (info == null) {
return Doubles.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return Doubles.EMPTY;
}
return (Doubles) caches.get(Double.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
}
@ -1088,14 +1112,14 @@ class FieldCacheImpl implements FieldCache {
return valuesIn;
} else {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null && info.hasDocValues()) {
if (info == null) {
return EMPTY_TERMSINDEX;
} else if (info.hasDocValues()) {
// we don't try to build a sorted instance from numeric/binary doc
// values because dedup can be very costly
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
}
if (info != null && !info.isIndexed()) {
throw new IllegalArgumentException("Cannot get terms index for \"" + field
+ "\": it isn't indexed and doesn't have sorted doc values");
} else if (!info.isIndexed()) {
return EMPTY_TERMSINDEX;
}
return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);
}
@ -1248,8 +1272,12 @@ class FieldCacheImpl implements FieldCache {
}
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null && info.hasDocValues()) {
if (info == null) {
return BinaryDocValues.EMPTY;
} else if (info.hasDocValues()) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
} else if (!info.isIndexed()) {
return BinaryDocValues.EMPTY;
}
return (BinaryDocValues) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);

View File

@ -35,9 +35,15 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.FieldCache.Bytes;
import org.apache.lucene.search.FieldCache.Doubles;
import org.apache.lucene.search.FieldCache.Floats;
import org.apache.lucene.search.FieldCache.Ints;
import org.apache.lucene.search.FieldCache.Longs;
import org.apache.lucene.search.FieldCache.Shorts;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -507,4 +513,115 @@ public class TestFieldCache extends LuceneTestCase {
ir.close();
dir.close();
}
public void testNonexistantFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
AtomicReader ar = getOnlySegmentReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
Bytes bytes = cache.getBytes(ar, "bogusbytes", true);
assertEquals(0, bytes.get(0));
Shorts shorts = cache.getShorts(ar, "bogusshorts", true);
assertEquals(0, shorts.get(0));
Ints ints = cache.getInts(ar, "bogusints", true);
assertEquals(0, ints.get(0));
Longs longs = cache.getLongs(ar, "boguslongs", true);
assertEquals(0, longs.get(0));
Floats floats = cache.getFloats(ar, "bogusfloats", true);
assertEquals(0, floats.get(0), 0.0f);
Doubles doubles = cache.getDoubles(ar, "bogusdoubles", true);
assertEquals(0, doubles.get(0), 0.0D);
BytesRef scratch = new BytesRef();
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
binaries.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(-1, sorted.getOrd(0));
sorted.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
Bits bits = cache.getDocsWithField(ar, "bogusbits");
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
public void testNonIndexedFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StoredField("bogusbytes", "bogus"));
doc.add(new StoredField("bogusshorts", "bogus"));
doc.add(new StoredField("bogusints", "bogus"));
doc.add(new StoredField("boguslongs", "bogus"));
doc.add(new StoredField("bogusfloats", "bogus"));
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusterms", "bogus"));
doc.add(new StoredField("bogustermsindex", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
AtomicReader ar = getOnlySegmentReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
Bytes bytes = cache.getBytes(ar, "bogusbytes", true);
assertEquals(0, bytes.get(0));
Shorts shorts = cache.getShorts(ar, "bogusshorts", true);
assertEquals(0, shorts.get(0));
Ints ints = cache.getInts(ar, "bogusints", true);
assertEquals(0, ints.get(0));
Longs longs = cache.getLongs(ar, "boguslongs", true);
assertEquals(0, longs.get(0));
Floats floats = cache.getFloats(ar, "bogusfloats", true);
assertEquals(0, floats.get(0), 0.0f);
Doubles doubles = cache.getDoubles(ar, "bogusdoubles", true);
assertEquals(0, doubles.get(0), 0.0D);
BytesRef scratch = new BytesRef();
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
binaries.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(-1, sorted.getOrd(0));
sorted.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
Bits bits = cache.getDocsWithField(ar, "bogusbits");
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
}

View File

@ -143,9 +143,9 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getTerms(readerA, "theString");
cache.getTerms(readerB, "theString");
cache.getTerms(readerX, "theString");
cache.getTerms(readerA, "theInt");
cache.getTerms(readerB, "theInt");
cache.getTerms(readerX, "theInt");
cache.getBytes(readerX, "theByte", false);