mirror of https://github.com/apache/lucene.git
LUCENE-4776: also do not cache huge arrays for non-existent/unindexed: just return EMPTY
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1445897 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fd3244cfea
commit
dc7bec58b1
|
@ -52,37 +52,105 @@ public interface FieldCache {
|
|||
public static abstract class Bytes {
|
||||
/** Return a single Byte representation of this field's value. */
|
||||
public abstract byte get(int docID);
|
||||
|
||||
/** Zero value for every document */
|
||||
public static final Bytes EMPTY = new Bytes() {
|
||||
@Override
|
||||
public byte get(int docID) {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Field values as 16-bit signed shorts */
|
||||
public static abstract class Shorts {
|
||||
/** Return a short representation of this field's value. */
|
||||
public abstract short get(int docID);
|
||||
|
||||
/** Zero value for every document */
|
||||
public static final Shorts EMPTY = new Shorts() {
|
||||
@Override
|
||||
public short get(int docID) {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Field values as 32-bit signed integers */
|
||||
public static abstract class Ints {
|
||||
/** Return an integer representation of this field's value. */
|
||||
public abstract int get(int docID);
|
||||
|
||||
/** Zero value for every document */
|
||||
public static final Ints EMPTY = new Ints() {
|
||||
@Override
|
||||
public int get(int docID) {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Field values as 32-bit signed long integers */
|
||||
public static abstract class Longs {
|
||||
/** Return an long representation of this field's value. */
|
||||
public abstract long get(int docID);
|
||||
|
||||
/** Zero value for every document */
|
||||
public static final Longs EMPTY = new Longs() {
|
||||
@Override
|
||||
public long get(int docID) {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Field values as 32-bit floats */
|
||||
public static abstract class Floats {
|
||||
/** Return an float representation of this field's value. */
|
||||
public abstract float get(int docID);
|
||||
|
||||
/** Zero value for every document */
|
||||
public static final Floats EMPTY = new Floats() {
|
||||
@Override
|
||||
public float get(int docID) {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Field values as 64-bit doubles */
|
||||
public static abstract class Doubles {
|
||||
/** Return an double representation of this field's value. */
|
||||
public abstract double get(int docID);
|
||||
|
||||
/** Zero value for every document */
|
||||
public static final Doubles EMPTY = new Doubles() {
|
||||
@Override
|
||||
public double get(int docID) {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** Returns MISSING/-1 ordinal for every document */
|
||||
public static final SortedDocValues EMPTY_TERMSINDEX = new SortedDocValues() {
|
||||
@Override
|
||||
public int getOrd(int docID) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lookupOrd(int ord, BytesRef result) {
|
||||
result.bytes = MISSING;
|
||||
result.offset = 0;
|
||||
result.length = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Placeholder indicating creation of this cache is currently in-progress.
|
||||
|
|
|
@ -373,8 +373,12 @@ class FieldCacheImpl implements FieldCache {
|
|||
};
|
||||
} else {
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (info != null && info.hasDocValues()) {
|
||||
if (info == null) {
|
||||
return Bytes.EMPTY;
|
||||
} else if (info.hasDocValues()) {
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
} else if (!info.isIndexed()) {
|
||||
return Bytes.EMPTY;
|
||||
}
|
||||
return (Bytes) caches.get(Byte.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
|
||||
}
|
||||
|
@ -463,8 +467,12 @@ class FieldCacheImpl implements FieldCache {
|
|||
};
|
||||
} else {
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (info != null && info.hasDocValues()) {
|
||||
if (info == null) {
|
||||
return Shorts.EMPTY;
|
||||
} else if (info.hasDocValues()) {
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
} else if (!info.isIndexed()) {
|
||||
return Shorts.EMPTY;
|
||||
}
|
||||
return (Shorts) caches.get(Short.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
|
||||
}
|
||||
|
@ -551,8 +559,12 @@ class FieldCacheImpl implements FieldCache {
|
|||
};
|
||||
} else {
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (info != null && info.hasDocValues()) {
|
||||
if (info == null) {
|
||||
return Ints.EMPTY;
|
||||
} else if (info.hasDocValues()) {
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
} else if (!info.isIndexed()) {
|
||||
return Ints.EMPTY;
|
||||
}
|
||||
return (Ints) caches.get(Integer.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
|
||||
}
|
||||
|
@ -649,8 +661,17 @@ class FieldCacheImpl implements FieldCache {
|
|||
}
|
||||
}
|
||||
|
||||
public Bits getDocsWithField(AtomicReader reader, String field)
|
||||
throws IOException {
|
||||
public Bits getDocsWithField(AtomicReader reader, String field) throws IOException {
|
||||
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||
if (fieldInfo == null) {
|
||||
// field does not exist or has no value
|
||||
return new Bits.MatchNoBits(reader.maxDoc());
|
||||
} else if (fieldInfo.hasDocValues()) {
|
||||
// doc values are dense
|
||||
return new Bits.MatchAllBits(reader.maxDoc());
|
||||
} else if (!fieldInfo.isIndexed()) {
|
||||
return new Bits.MatchNoBits(reader.maxDoc());
|
||||
}
|
||||
return (Bits) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, null), false);
|
||||
}
|
||||
|
||||
|
@ -663,17 +684,8 @@ class FieldCacheImpl implements FieldCache {
|
|||
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
|
||||
throws IOException {
|
||||
final String field = key.field;
|
||||
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||
final int maxDoc = reader.maxDoc();
|
||||
|
||||
if (fieldInfo == null) {
|
||||
// field does not exist or has no value
|
||||
return new Bits.MatchNoBits(maxDoc);
|
||||
} else if (fieldInfo.hasDocValues()) {
|
||||
// doc values are dense
|
||||
return new Bits.MatchAllBits(maxDoc);
|
||||
}
|
||||
|
||||
// Visit all docs that have terms for this field
|
||||
FixedBitSet res = null;
|
||||
Terms terms = reader.terms(field);
|
||||
|
@ -741,8 +753,12 @@ class FieldCacheImpl implements FieldCache {
|
|||
};
|
||||
} else {
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (info != null && info.hasDocValues()) {
|
||||
if (info == null) {
|
||||
return Floats.EMPTY;
|
||||
} else if (info.hasDocValues()) {
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
} else if (!info.isIndexed()) {
|
||||
return Floats.EMPTY;
|
||||
}
|
||||
return (Floats) caches.get(Float.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
|
||||
}
|
||||
|
@ -848,8 +864,12 @@ class FieldCacheImpl implements FieldCache {
|
|||
};
|
||||
} else {
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (info != null && info.hasDocValues()) {
|
||||
if (info == null) {
|
||||
return Longs.EMPTY;
|
||||
} else if (info.hasDocValues()) {
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
} else if (!info.isIndexed()) {
|
||||
return Longs.EMPTY;
|
||||
}
|
||||
return (Longs) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
|
||||
}
|
||||
|
@ -955,8 +975,12 @@ class FieldCacheImpl implements FieldCache {
|
|||
};
|
||||
} else {
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (info != null && info.hasDocValues()) {
|
||||
if (info == null) {
|
||||
return Doubles.EMPTY;
|
||||
} else if (info.hasDocValues()) {
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
} else if (!info.isIndexed()) {
|
||||
return Doubles.EMPTY;
|
||||
}
|
||||
return (Doubles) caches.get(Double.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
|
||||
}
|
||||
|
@ -1088,14 +1112,14 @@ class FieldCacheImpl implements FieldCache {
|
|||
return valuesIn;
|
||||
} else {
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (info != null && info.hasDocValues()) {
|
||||
if (info == null) {
|
||||
return EMPTY_TERMSINDEX;
|
||||
} else if (info.hasDocValues()) {
|
||||
// we don't try to build a sorted instance from numeric/binary doc
|
||||
// values because dedup can be very costly
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
}
|
||||
if (info != null && !info.isIndexed()) {
|
||||
throw new IllegalArgumentException("Cannot get terms index for \"" + field
|
||||
+ "\": it isn't indexed and doesn't have sorted doc values");
|
||||
} else if (!info.isIndexed()) {
|
||||
return EMPTY_TERMSINDEX;
|
||||
}
|
||||
return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);
|
||||
}
|
||||
|
@ -1248,8 +1272,12 @@ class FieldCacheImpl implements FieldCache {
|
|||
}
|
||||
|
||||
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||
if (info != null && info.hasDocValues()) {
|
||||
if (info == null) {
|
||||
return BinaryDocValues.EMPTY;
|
||||
} else if (info.hasDocValues()) {
|
||||
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||
} else if (!info.isIndexed()) {
|
||||
return BinaryDocValues.EMPTY;
|
||||
}
|
||||
|
||||
return (BinaryDocValues) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);
|
||||
|
|
|
@ -35,9 +35,15 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.IntField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.FieldCache.Bytes;
|
||||
import org.apache.lucene.search.FieldCache.Doubles;
|
||||
import org.apache.lucene.search.FieldCache.Floats;
|
||||
import org.apache.lucene.search.FieldCache.Ints;
|
||||
import org.apache.lucene.search.FieldCache.Longs;
|
||||
import org.apache.lucene.search.FieldCache.Shorts;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -507,4 +513,115 @@ public class TestFieldCache extends LuceneTestCase {
|
|||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonexistantFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
AtomicReader ar = getOnlySegmentReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
Bytes bytes = cache.getBytes(ar, "bogusbytes", true);
|
||||
assertEquals(0, bytes.get(0));
|
||||
|
||||
Shorts shorts = cache.getShorts(ar, "bogusshorts", true);
|
||||
assertEquals(0, shorts.get(0));
|
||||
|
||||
Ints ints = cache.getInts(ar, "bogusints", true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
Longs longs = cache.getLongs(ar, "boguslongs", true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
Floats floats = cache.getFloats(ar, "bogusfloats", true);
|
||||
assertEquals(0, floats.get(0), 0.0f);
|
||||
|
||||
Doubles doubles = cache.getDoubles(ar, "bogusdoubles", true);
|
||||
assertEquals(0, doubles.get(0), 0.0D);
|
||||
|
||||
BytesRef scratch = new BytesRef();
|
||||
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
|
||||
binaries.get(0, scratch);
|
||||
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
|
||||
|
||||
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
|
||||
assertEquals(-1, sorted.getOrd(0));
|
||||
sorted.get(0, scratch);
|
||||
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
|
||||
|
||||
Bits bits = cache.getDocsWithField(ar, "bogusbits");
|
||||
assertFalse(bits.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNonIndexedFields() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new StoredField("bogusbytes", "bogus"));
|
||||
doc.add(new StoredField("bogusshorts", "bogus"));
|
||||
doc.add(new StoredField("bogusints", "bogus"));
|
||||
doc.add(new StoredField("boguslongs", "bogus"));
|
||||
doc.add(new StoredField("bogusfloats", "bogus"));
|
||||
doc.add(new StoredField("bogusdoubles", "bogus"));
|
||||
doc.add(new StoredField("bogusterms", "bogus"));
|
||||
doc.add(new StoredField("bogustermsindex", "bogus"));
|
||||
doc.add(new StoredField("bogusbits", "bogus"));
|
||||
iw.addDocument(doc);
|
||||
DirectoryReader ir = iw.getReader();
|
||||
iw.close();
|
||||
|
||||
AtomicReader ar = getOnlySegmentReader(ir);
|
||||
|
||||
final FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
|
||||
Bytes bytes = cache.getBytes(ar, "bogusbytes", true);
|
||||
assertEquals(0, bytes.get(0));
|
||||
|
||||
Shorts shorts = cache.getShorts(ar, "bogusshorts", true);
|
||||
assertEquals(0, shorts.get(0));
|
||||
|
||||
Ints ints = cache.getInts(ar, "bogusints", true);
|
||||
assertEquals(0, ints.get(0));
|
||||
|
||||
Longs longs = cache.getLongs(ar, "boguslongs", true);
|
||||
assertEquals(0, longs.get(0));
|
||||
|
||||
Floats floats = cache.getFloats(ar, "bogusfloats", true);
|
||||
assertEquals(0, floats.get(0), 0.0f);
|
||||
|
||||
Doubles doubles = cache.getDoubles(ar, "bogusdoubles", true);
|
||||
assertEquals(0, doubles.get(0), 0.0D);
|
||||
|
||||
BytesRef scratch = new BytesRef();
|
||||
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
|
||||
binaries.get(0, scratch);
|
||||
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
|
||||
|
||||
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
|
||||
assertEquals(-1, sorted.getOrd(0));
|
||||
sorted.get(0, scratch);
|
||||
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
|
||||
|
||||
Bits bits = cache.getDocsWithField(ar, "bogusbits");
|
||||
assertFalse(bits.get(0));
|
||||
|
||||
// check that we cached nothing
|
||||
assertEquals(0, cache.getCacheEntries().length);
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -143,9 +143,9 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase {
|
|||
FieldCache cache = FieldCache.DEFAULT;
|
||||
cache.purgeAllCaches();
|
||||
|
||||
cache.getTerms(readerA, "theString");
|
||||
cache.getTerms(readerB, "theString");
|
||||
cache.getTerms(readerX, "theString");
|
||||
cache.getTerms(readerA, "theInt");
|
||||
cache.getTerms(readerB, "theInt");
|
||||
cache.getTerms(readerX, "theInt");
|
||||
|
||||
cache.getBytes(readerX, "theByte", false);
|
||||
|
||||
|
|
Loading…
Reference in New Issue