From dc7bec58b16c9d750126eb8efb8bdeefa57023e5 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 13 Feb 2013 20:19:33 +0000 Subject: [PATCH] LUCENE-4776: also do not cache huge arrays for non-existent/unindexed: just return EMPTY git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1445897 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/search/FieldCache.java | 68 ++++++++++ .../apache/lucene/search/FieldCacheImpl.java | 74 +++++++---- .../apache/lucene/search/TestFieldCache.java | 117 ++++++++++++++++++ .../util/TestFieldCacheSanityChecker.java | 6 +- 4 files changed, 239 insertions(+), 26 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java index 1b08ebe393a..da3590c87d2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java @@ -52,37 +52,105 @@ public interface FieldCache { public static abstract class Bytes { /** Return a single Byte representation of this field's value. */ public abstract byte get(int docID); + + /** Zero value for every document */ + public static final Bytes EMPTY = new Bytes() { + @Override + public byte get(int docID) { + return 0; + } + }; } /** Field values as 16-bit signed shorts */ public static abstract class Shorts { /** Return a short representation of this field's value. */ public abstract short get(int docID); + + /** Zero value for every document */ + public static final Shorts EMPTY = new Shorts() { + @Override + public short get(int docID) { + return 0; + } + }; } /** Field values as 32-bit signed integers */ public static abstract class Ints { /** Return an integer representation of this field's value. */ public abstract int get(int docID); + + /** Zero value for every document */ + public static final Ints EMPTY = new Ints() { + @Override + public int get(int docID) { + return 0; + } + }; } /** Field values as 32-bit signed long integers */ public static abstract class Longs { /** Return an long representation of this field's value. */ public abstract long get(int docID); + + /** Zero value for every document */ + public static final Longs EMPTY = new Longs() { + @Override + public long get(int docID) { + return 0; + } + }; } /** Field values as 32-bit floats */ public static abstract class Floats { /** Return an float representation of this field's value. */ public abstract float get(int docID); + + /** Zero value for every document */ + public static final Floats EMPTY = new Floats() { + @Override + public float get(int docID) { + return 0; + } + }; } /** Field values as 64-bit doubles */ public static abstract class Doubles { /** Return an double representation of this field's value. */ public abstract double get(int docID); + + /** Zero value for every document */ + public static final Doubles EMPTY = new Doubles() { + @Override + public double get(int docID) { + return 0; + } + }; } + + /** Returns MISSING/-1 ordinal for every document */ + public static final SortedDocValues EMPTY_TERMSINDEX = new SortedDocValues() { + @Override + public int getOrd(int docID) { + return -1; + } + + @Override + public void lookupOrd(int ord, BytesRef result) { + result.bytes = MISSING; + result.offset = 0; + result.length = 0; + } + + @Override + public int getValueCount() { + return 0; + } + }; /** * Placeholder indicating creation of this cache is currently in-progress. diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java index 01227a84f0f..5b3ed3ff904 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -373,8 +373,12 @@ class FieldCacheImpl implements FieldCache { }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); - if (info != null && info.hasDocValues()) { + if (info == null) { + return Bytes.EMPTY; + } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Bytes.EMPTY; } return (Bytes) caches.get(Byte.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } @@ -463,8 +467,12 @@ class FieldCacheImpl implements FieldCache { }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); - if (info != null && info.hasDocValues()) { + if (info == null) { + return Shorts.EMPTY; + } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Shorts.EMPTY; } return (Shorts) caches.get(Short.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } @@ -551,8 +559,12 @@ class FieldCacheImpl implements FieldCache { }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); - if (info != null && info.hasDocValues()) { + if (info == null) { + return Ints.EMPTY; + } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Ints.EMPTY; } return (Ints) caches.get(Integer.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } @@ -649,8 +661,17 @@ class FieldCacheImpl implements FieldCache { } } - public Bits getDocsWithField(AtomicReader reader, String field) - throws IOException { + public Bits getDocsWithField(AtomicReader reader, String field) throws IOException { + final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); + if (fieldInfo == null) { + // field does not exist or has no value + return new Bits.MatchNoBits(reader.maxDoc()); + } else if (fieldInfo.hasDocValues()) { + // doc values are dense + return new Bits.MatchAllBits(reader.maxDoc()); + } else if (!fieldInfo.isIndexed()) { + return new Bits.MatchNoBits(reader.maxDoc()); + } return (Bits) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, null), false); } @@ -663,17 +684,8 @@ class FieldCacheImpl implements FieldCache { protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */) throws IOException { final String field = key.field; - final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); final int maxDoc = reader.maxDoc(); - if (fieldInfo == null) { - // field does not exist or has no value - return new Bits.MatchNoBits(maxDoc); - } else if (fieldInfo.hasDocValues()) { - // doc values are dense - return new Bits.MatchAllBits(maxDoc); - } - // Visit all docs that have terms for this field FixedBitSet res = null; Terms terms = reader.terms(field); @@ -741,8 +753,12 @@ class FieldCacheImpl implements FieldCache { }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); - if (info != null && info.hasDocValues()) { + if (info == null) { + return Floats.EMPTY; + } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Floats.EMPTY; } return (Floats) caches.get(Float.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } @@ -848,8 +864,12 @@ class FieldCacheImpl implements FieldCache { }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); - if (info != null && info.hasDocValues()) { + if (info == null) { + return Longs.EMPTY; + } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Longs.EMPTY; } return (Longs) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } @@ -955,8 +975,12 @@ class FieldCacheImpl implements FieldCache { }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); - if (info != null && info.hasDocValues()) { + if (info == null) { + return Doubles.EMPTY; + } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Doubles.EMPTY; } return (Doubles) caches.get(Double.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } @@ -1088,14 +1112,14 @@ class FieldCacheImpl implements FieldCache { return valuesIn; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); - if (info != null && info.hasDocValues()) { + if (info == null) { + return EMPTY_TERMSINDEX; + } else if (info.hasDocValues()) { // we don't try to build a sorted instance from numeric/binary doc // values because dedup can be very costly throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); - } - if (info != null && !info.isIndexed()) { - throw new IllegalArgumentException("Cannot get terms index for \"" + field - + "\": it isn't indexed and doesn't have sorted doc values"); + } else if (!info.isIndexed()) { + return EMPTY_TERMSINDEX; } return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); } @@ -1248,8 +1272,12 @@ class FieldCacheImpl implements FieldCache { } final FieldInfo info = reader.getFieldInfos().fieldInfo(field); - if (info != null && info.hasDocValues()) { + if (info == null) { + return BinaryDocValues.EMPTY; + } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return BinaryDocValues.EMPTY; } return (BinaryDocValues) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java index 61966569c02..1b0ec68b1b5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java @@ -35,9 +35,15 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.*; +import org.apache.lucene.search.FieldCache.Bytes; +import org.apache.lucene.search.FieldCache.Doubles; +import org.apache.lucene.search.FieldCache.Floats; import org.apache.lucene.search.FieldCache.Ints; +import org.apache.lucene.search.FieldCache.Longs; +import org.apache.lucene.search.FieldCache.Shorts; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -507,4 +513,115 @@ public class TestFieldCache extends LuceneTestCase { ir.close(); dir.close(); } + + public void testNonexistantFields() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + iw.addDocument(doc); + DirectoryReader ir = iw.getReader(); + iw.close(); + + AtomicReader ar = getOnlySegmentReader(ir); + + final FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + assertEquals(0, cache.getCacheEntries().length); + + Bytes bytes = cache.getBytes(ar, "bogusbytes", true); + assertEquals(0, bytes.get(0)); + + Shorts shorts = cache.getShorts(ar, "bogusshorts", true); + assertEquals(0, shorts.get(0)); + + Ints ints = cache.getInts(ar, "bogusints", true); + assertEquals(0, ints.get(0)); + + Longs longs = cache.getLongs(ar, "boguslongs", true); + assertEquals(0, longs.get(0)); + + Floats floats = cache.getFloats(ar, "bogusfloats", true); + assertEquals(0, floats.get(0), 0.0f); + + Doubles doubles = cache.getDoubles(ar, "bogusdoubles", true); + assertEquals(0, doubles.get(0), 0.0D); + + BytesRef scratch = new BytesRef(); + BinaryDocValues binaries = cache.getTerms(ar, "bogusterms"); + binaries.get(0, scratch); + assertTrue(scratch.bytes == BinaryDocValues.MISSING); + + SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex"); + assertEquals(-1, sorted.getOrd(0)); + sorted.get(0, scratch); + assertTrue(scratch.bytes == BinaryDocValues.MISSING); + + Bits bits = cache.getDocsWithField(ar, "bogusbits"); + assertFalse(bits.get(0)); + + // check that we cached nothing + assertEquals(0, cache.getCacheEntries().length); + ir.close(); + dir.close(); + } + + public void testNonIndexedFields() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new StoredField("bogusbytes", "bogus")); + doc.add(new StoredField("bogusshorts", "bogus")); + doc.add(new StoredField("bogusints", "bogus")); + doc.add(new StoredField("boguslongs", "bogus")); + doc.add(new StoredField("bogusfloats", "bogus")); + doc.add(new StoredField("bogusdoubles", "bogus")); + doc.add(new StoredField("bogusterms", "bogus")); + doc.add(new StoredField("bogustermsindex", "bogus")); + doc.add(new StoredField("bogusbits", "bogus")); + iw.addDocument(doc); + DirectoryReader ir = iw.getReader(); + iw.close(); + + AtomicReader ar = getOnlySegmentReader(ir); + + final FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + assertEquals(0, cache.getCacheEntries().length); + + Bytes bytes = cache.getBytes(ar, "bogusbytes", true); + assertEquals(0, bytes.get(0)); + + Shorts shorts = cache.getShorts(ar, "bogusshorts", true); + assertEquals(0, shorts.get(0)); + + Ints ints = cache.getInts(ar, "bogusints", true); + assertEquals(0, ints.get(0)); + + Longs longs = cache.getLongs(ar, "boguslongs", true); + assertEquals(0, longs.get(0)); + + Floats floats = cache.getFloats(ar, "bogusfloats", true); + assertEquals(0, floats.get(0), 0.0f); + + Doubles doubles = cache.getDoubles(ar, "bogusdoubles", true); + assertEquals(0, doubles.get(0), 0.0D); + + BytesRef scratch = new BytesRef(); + BinaryDocValues binaries = cache.getTerms(ar, "bogusterms"); + binaries.get(0, scratch); + assertTrue(scratch.bytes == BinaryDocValues.MISSING); + + SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex"); + assertEquals(-1, sorted.getOrd(0)); + sorted.get(0, scratch); + assertTrue(scratch.bytes == BinaryDocValues.MISSING); + + Bits bits = cache.getDocsWithField(ar, "bogusbits"); + assertFalse(bits.get(0)); + + // check that we cached nothing + assertEquals(0, cache.getCacheEntries().length); + ir.close(); + dir.close(); + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java b/lucene/core/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java index 6b479594565..fbfbded4316 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java @@ -143,9 +143,9 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase { FieldCache cache = FieldCache.DEFAULT; cache.purgeAllCaches(); - cache.getTerms(readerA, "theString"); - cache.getTerms(readerB, "theString"); - cache.getTerms(readerX, "theString"); + cache.getTerms(readerA, "theInt"); + cache.getTerms(readerB, "theInt"); + cache.getTerms(readerX, "theInt"); cache.getBytes(readerX, "theByte", false);