diff --git a/CHANGES.txt b/CHANGES.txt index cdbc31b36be..6b0b3553a3f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -667,7 +667,17 @@ New features 35. LUCENE-1790: Added BoostingFunctionTermQuery to enable scoring of payloads based on the maximum payload seen for a document. - Slight refactoring of Similarity and other payload queries (Grant Ingersoll) + Slight refactoring of Similarity and other payload queries (Grant Ingersoll) + +36. LUCENE-1749: Addition of FieldCacheSanityChecker utility, and + hooks to use it in all existing Lucene Tests. This class can + be used by any application to inspect the FieldCache and provide + diagnostic information about the possibility of inconsistent + FieldCache usage. Namely: FieldCache entries for the same field + with different datatypes or parsers; and FieldCache entries for + the same field in both a reader, and one of it's (descendant) sub + readers. + (Chris Hostetter, Mark Miller) Optimizations diff --git a/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java b/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java index 42aa6b9f826..6c4a93ed596 100644 --- a/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java +++ b/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java @@ -244,11 +244,18 @@ public class TestRemoteSort extends LuceneTestCase implements Serializable { assertMatches (multi, queryX, sort, "CAIEG"); sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true)); assertMatches (multi, queryY, sort, "HJDBF"); + + assertSaneFieldCaches(getName() + " ComparatorSource"); + FieldCache.DEFAULT.purgeAllCaches(); + SortComparator custom = SampleComparable.getComparator(); sort.setSort (new SortField ("custom", custom)); assertMatches (multi, queryX, sort, "CAIEG"); sort.setSort (new SortField ("custom", custom, true)); assertMatches (multi, queryY, sort, "HJDBF"); + + assertSaneFieldCaches(getName() + " Comparator"); + FieldCache.DEFAULT.purgeAllCaches(); } // test that the relevancy scores are the same even if @@ -343,12 +350,6 @@ public class TestRemoteSort extends LuceneTestCase implements Serializable { sort.setSort("string", true); assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - sort.setSort(new SortField[] { new SortField ("string", Locale.US) }); - assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - - sort.setSort(new SortField[] { new SortField ("string", Locale.US, true) }); - assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - sort.setSort(new String[] {"int","float"}); assertMatches(multi, queryA, sort, "IDHFGJEABC"); @@ -369,6 +370,21 @@ public class TestRemoteSort extends LuceneTestCase implements Serializable { sort.setSort("string", true); assertMatches(multi, queryF, sort, "IJZ"); + + // up to this point, all of the searches should have "sane" + // FieldCache behavior, and should have reused hte cache in several cases + assertSaneFieldCaches(getName() + " Basics"); + // next we'll check an alternate Locale for string, so purge first + FieldCache.DEFAULT.purgeAllCaches(); + + sort.setSort(new SortField[] { new SortField ("string", Locale.US) }); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); + + sort.setSort(new SortField[] { new SortField ("string", Locale.US, true)}); + assertMatches(multi, queryA, sort, "CBEFGHIAJD"); + + assertSaneFieldCaches(getName() + " Locale.US"); + FieldCache.DEFAULT.purgeAllCaches(); } // make sure the documents returned by the search match the expected list diff --git a/src/java/org/apache/lucene/search/FieldCache.java b/src/java/org/apache/lucene/search/FieldCache.java index 8c6917491d7..81f13d7f7e9 100644 --- a/src/java/org/apache/lucene/search/FieldCache.java +++ b/src/java/org/apache/lucene/search/FieldCache.java @@ -19,12 +19,15 @@ package org.apache.lucene.search; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.analysis.NumericTokenStream; // for javadocs import java.io.IOException; import java.io.Serializable; +import java.text.DecimalFormat; + /** * Expert: Maintains caches of term values. * @@ -32,9 +35,14 @@ import java.io.Serializable; * * @since lucene 1.4 * @version $Id$ + * @see org.apache.lucene.util.FieldCacheSanityChecker */ public interface FieldCache { + public static final class CreationPlaceholder { + Object value; + } + /** Indicator for StringIndex values in the cache. */ // NOTE: the value assigned to this constant must not be // the same as any of those in SortField!! @@ -146,6 +154,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_BYTE_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER"; + } }; /** The default parser for short values, which are encoded by {@link Short#toString(short)} */ @@ -156,6 +167,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_SHORT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER"; + } }; /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */ @@ -166,6 +180,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_INT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_INT_PARSER"; + } }; /** The default parser for float values, which are encoded by {@link Float#toString(float)} */ @@ -176,6 +193,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_FLOAT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER"; + } }; /** The default parser for long values, which are encoded by {@link Long#toString(long)} */ @@ -186,6 +206,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_LONG_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_LONG_PARSER"; + } }; /** The default parser for double values, which are encoded by {@link Double#toString(double)} */ @@ -196,6 +219,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_DOUBLE_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER"; + } }; /** @@ -212,6 +238,9 @@ public interface FieldCache { protected Object readResolve() { return NUMERIC_UTILS_INT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER"; + } }; /** @@ -228,6 +257,9 @@ public interface FieldCache { protected Object readResolve() { return NUMERIC_UTILS_FLOAT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER"; + } }; /** @@ -244,6 +276,9 @@ public interface FieldCache { protected Object readResolve() { return NUMERIC_UTILS_LONG_PARSER; } + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER"; + } }; /** @@ -260,6 +295,9 @@ public interface FieldCache { protected Object readResolve() { return NUMERIC_UTILS_DOUBLE_PARSER; } + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER"; + } }; /** Checks the internal cache for an appropriate entry, and if none is @@ -477,5 +515,105 @@ public interface FieldCache { */ public Comparable[] getCustom (IndexReader reader, String field, SortComparator comparator) throws IOException; + + /** + * EXPERT: A unique Identifier/Description for each item in the FieldCache. + * Can be useful for logging/debugging. + *
+ * EXPERIMENTAL API: This API is considered extremely advanced + * and experimental. It may be removed or altered w/o warning in future + * releases + * of Lucene. + *
+ */ + public static abstract class CacheEntry { + public abstract Object getReaderKey(); + public abstract String getFieldName(); + public abstract Class getCacheType(); + public abstract Object getCustom(); + public abstract Object getValue(); + private String size = null; + protected final void setEstimatedSize(String size) { + this.size = size; + } + /** + * @see #estimateSize(RamUsageEstimator) + */ + public void estimateSize() { + estimateSize(new RamUsageEstimator(false)); // doesn't check for interned + } + /** + * Computes (and stores) the estimated size of the cache Value + * @see #getEstimatedSize + */ + public void estimateSize(RamUsageEstimator ramCalc) { + long size = ramCalc.estimateRamUsage(getValue()); + setEstimatedSize(RamUsageEstimator.humanReadableUnits + (size, new DecimalFormat("0.#"))); + + } + /** + * The most recently estimated size of the value, null unless + * estimateSize has been called. + */ + public final String getEstimatedSize() { + return size; + } + + + public String toString() { + StringBuffer b = new StringBuffer(); + b.append("'").append(getReaderKey()).append("'=>"); + b.append("'").append(getFieldName()).append("',"); + b.append(getCacheType()).append(",").append(getCustom()); + b.append("=>").append(getValue().getClass().getName()).append("#"); + b.append(System.identityHashCode(getValue())); + + String s = getEstimatedSize(); + if(null != s) { + b.append(" (size =~ ").append(s).append(')'); + } + + return b.toString(); + } + } + + /** + * EXPERT: Generates an array of CacheEntry objects representing all items + * currently in the FieldCache. + *+ * NOTE: These CacheEntry objects maintain a strong refrence to the + * Cached Values. Maintaining refrences to a CacheEntry the IndexReader + * associated with it has garbage collected will prevent the Value itself + * from being garbage collected when the Cache drops the WeakRefrence. + *
+ *+ * EXPERIMENTAL API: This API is considered extremely advanced + * and experimental. It may be removed or altered w/o warning in future + * releases + * of Lucene. + *
+ */ + public abstract CacheEntry[] getCacheEntries(); + + /** + *+ * EXPERT: Instructs the FieldCache to forcibly expunge all entries + * from the underlying caches. This is intended only to be used for + * test methods as a way to ensure a known base state of the Cache + * (with out needing to rely on GC to free WeakReferences). + * It should not be relied on for "Cache maintenance" in general + * application code. + *
+ *+ * EXPERIMENTAL API: This API is considered extremely advanced + * and experimental. It may be removed or altered w/o warning in future + * releases + * of Lucene. + *
+ */ + public abstract void purgeAllCaches(); + + } diff --git a/src/java/org/apache/lucene/search/FieldCacheImpl.java b/src/java/org/apache/lucene/search/FieldCacheImpl.java index 8b1e1da4e3b..61fbbab798d 100644 --- a/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -17,18 +17,22 @@ package org.apache.lucene.search; * limitations under the License. */ +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.WeakHashMap; + +import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import org.apache.lucene.util.StringHelper; -import java.io.IOException; -import java.util.HashMap; -import java.util.Locale; -import java.util.Map; -import java.util.WeakHashMap; - /** * Expert: The default cache implementation, storing all values in memory. * A WeakHashMap is used for storage. @@ -41,6 +45,116 @@ import java.util.WeakHashMap; // TODO: change interface to FieldCache in 3.0 when removed class FieldCacheImpl implements ExtendedFieldCache { + private Map caches; + FieldCacheImpl() { + init(); + } + private synchronized void init() { + caches = new HashMap(7); + caches.put(Byte.TYPE, new ByteCache(this)); + caches.put(Short.TYPE, new ShortCache(this)); + caches.put(Integer.TYPE, new IntCache(this)); + caches.put(Float.TYPE, new FloatCache(this)); + caches.put(Long.TYPE, new LongCache(this)); + caches.put(Double.TYPE, new DoubleCache(this)); + caches.put(String.class, new StringCache(this)); + caches.put(StringIndex.class, new StringIndexCache(this)); + caches.put(Comparable.class, new CustomCache(this)); + caches.put(Object.class, new AutoCache(this)); + } + + public void purgeAllCaches() { + init(); + } + + public CacheEntry[] getCacheEntries() { + List result = new ArrayList(17); + Iterator outerKeys = caches.keySet().iterator(); + while (outerKeys.hasNext()) { + Class cacheType = (Class)outerKeys.next(); + Cache cache = (Cache)caches.get(cacheType); + Iterator innerKeys = cache.readerCache.keySet().iterator(); + while (innerKeys.hasNext()) { + // we've now materialized a hard ref + Object readerKey = innerKeys.next(); + // innerKeys was backed by WeakHashMap, sanity check + // that it wasn't GCed before we made hard ref + if (null != readerKey && cache.readerCache.containsKey(readerKey)) { + Map innerCache = ((Map)cache.readerCache.get(readerKey)); + Iterator keys = innerCache.keySet().iterator(); + while (keys.hasNext()) { + Entry entry = (Entry) keys.next(); + result.add(new CacheEntryImpl(readerKey, entry.field, + cacheType, entry.type, + entry.custom, entry.locale, + innerCache.get(entry))); + } + } + } + } + return (CacheEntry[]) result.toArray(new CacheEntry[result.size()]); + } + + private static final class CacheEntryImpl extends CacheEntry { + /** + * @deprecated Only needed because of Entry (ab)use by + * FieldSortedHitQueue, remove when FieldSortedHitQueue + * is removed + */ + private final int sortFieldType; + /** + * @deprecated Only needed because of Entry (ab)use by + * FieldSortedHitQueue, remove when FieldSortedHitQueue + * is removed + */ + private final Locale locale; + + private final Object readerKey; + private final String fieldName; + private final Class cacheType; + private final Object custom; + private final Object value; + CacheEntryImpl(Object readerKey, String fieldName, + Class cacheType, int sortFieldType, + Object custom, Locale locale, + Object value) { + this.readerKey = readerKey; + this.fieldName = fieldName; + this.cacheType = cacheType; + this.sortFieldType = sortFieldType; + this.custom = custom; + this.locale = locale; + this.value = value; + + // :HACK: for testing. +// if (null != locale || SortField.CUSTOM != sortFieldType) { +// throw new RuntimeException("Locale/sortFieldType: " + this); +// } + + } + public Object getReaderKey() { return readerKey; } + public String getFieldName() { return fieldName; } + public Class getCacheType() { return cacheType; } + public Object getCustom() { return custom; } + public Object getValue() { return value; } + /** + * Adds warning to super.toString if Local or sortFieldType were specified + * @deprecated Only needed because of Entry (ab)use by + * FieldSortedHitQueue, remove when FieldSortedHitQueue + * is removed + */ + public String toString() { + String r = super.toString(); + if (null != locale) { + r = r + "...!!!Locale:" + locale + "???"; + } + if (SortField.CUSTOM != sortFieldType) { + r = r + "...!!!SortType:" + sortFieldType + "???"; + } + return r; + } + } + /** * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops * processing terms and returns the current FieldCache @@ -51,16 +165,25 @@ class FieldCacheImpl implements ExtendedFieldCache { /** Expert: Internal cache. */ abstract static class Cache { - private final Map readerCache = new WeakHashMap(); + Cache() { + this.wrapper = null; + } + + Cache(FieldCache wrapper) { + this.wrapper = wrapper; + } + + final FieldCache wrapper; + + final Map readerCache = new WeakHashMap(); - protected abstract Object createValue(IndexReader reader, Object key) + protected abstract Object createValue(IndexReader reader, Entry key) throws IOException; - public Object get(IndexReader reader, Object key) throws IOException { + public Object get(IndexReader reader, Entry key) throws IOException { Map innerCache; Object value; final Object readerKey = reader.getFieldCacheKey(); - synchronized (readerCache) { innerCache = (Map) readerCache.get(readerKey); if (innerCache == null) { @@ -91,18 +214,25 @@ class FieldCacheImpl implements ExtendedFieldCache { } } - static final class CreationPlaceholder { - Object value; - } - /** Expert: Every composite-key in the internal cache is of this type. */ static class Entry { final String field; // which Fieldable + /** + * @deprecated Only (ab)used by FieldSortedHitQueue, + * remove when FieldSortedHitQueue is removed + */ final int type; // which SortField type final Object custom; // which custom comparator or parser + /** + * @deprecated Only (ab)used by FieldSortedHitQueue, + * remove when FieldSortedHitQueue is removed + */ final Locale locale; // the locale we're sorting (if string) - /** Creates one of these objects. */ + /** + * @deprecated Only (ab)used by FieldSortedHitQueue, + * remove when FieldSortedHitQueue is removed + */ Entry (String field, int type, Locale locale) { this.field = StringHelper.intern(field); this.type = type; @@ -118,7 +248,10 @@ class FieldCacheImpl implements ExtendedFieldCache { this.locale = null; } - /** Creates one of these objects for a custom type with parser, needed by FieldSortedHitQueue. */ + /** + * @deprecated Only (ab)used by FieldSortedHitQueue, + * remove when FieldSortedHitQueue is removed + */ Entry (String field, int type, Parser parser) { this.field = StringHelper.intern(field); this.type = type; @@ -157,18 +290,20 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public byte[] getBytes(IndexReader reader, String field, ByteParser parser) throws IOException { - return (byte[]) bytesCache.get(reader, new Entry(field, parser)); + return (byte[]) ((Cache)caches.get(Byte.TYPE)).get(reader, new Entry(field, parser)); } - Cache bytesCache = new Cache() { - - protected Object createValue(IndexReader reader, Object entryKey) + static final class ByteCache extends Cache { + ByteCache(FieldCache wrapper) { + super(wrapper); + } + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; ByteParser parser = (ByteParser) entry.custom; if (parser == null) { - return getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER); + return wrapper.getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER); } final byte[] retArray = new byte[reader.maxDoc()]; TermDocs termDocs = reader.termDocs(); @@ -200,18 +335,21 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public short[] getShorts(IndexReader reader, String field, ShortParser parser) throws IOException { - return (short[]) shortsCache.get(reader, new Entry(field, parser)); + return (short[]) ((Cache)caches.get(Short.TYPE)).get(reader, new Entry(field, parser)); } - Cache shortsCache = new Cache() { + static final class ShortCache extends Cache { + ShortCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; ShortParser parser = (ShortParser) entry.custom; if (parser == null) { - return getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER); + return wrapper.getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER); } final short[] retArray = new short[reader.maxDoc()]; TermDocs termDocs = reader.termDocs(); @@ -243,21 +381,24 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public int[] getInts(IndexReader reader, String field, IntParser parser) throws IOException { - return (int[]) intsCache.get(reader, new Entry(field, parser)); + return (int[]) ((Cache)caches.get(Integer.TYPE)).get(reader, new Entry(field, parser)); } - Cache intsCache = new Cache() { + static final class IntCache extends Cache { + IntCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; IntParser parser = (IntParser) entry.custom; if (parser == null) { try { - return getInts(reader, field, DEFAULT_INT_PARSER); + return wrapper.getInts(reader, field, DEFAULT_INT_PARSER); } catch (NumberFormatException ne) { - return getInts(reader, field, NUMERIC_UTILS_INT_PARSER); + return wrapper.getInts(reader, field, NUMERIC_UTILS_INT_PARSER); } } int[] retArray = null; @@ -295,24 +436,28 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public float[] getFloats(IndexReader reader, String field, FloatParser parser) - throws IOException { - return (float[]) floatsCache.get(reader, new Entry(field, parser)); + throws IOException { + + return (float[]) ((Cache)caches.get(Float.TYPE)).get(reader, new Entry(field, parser)); } - Cache floatsCache = new Cache() { + static final class FloatCache extends Cache { + FloatCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; FloatParser parser = (FloatParser) entry.custom; if (parser == null) { try { - return getFloats(reader, field, DEFAULT_FLOAT_PARSER); + return wrapper.getFloats(reader, field, DEFAULT_FLOAT_PARSER); } catch (NumberFormatException ne) { - return getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER); + return wrapper.getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER); } - } + } float[] retArray = null; TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field)); @@ -347,27 +492,30 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public long[] getLongs(IndexReader reader, String field, FieldCache.LongParser parser) throws IOException { - return (long[]) longsCache.get(reader, new Entry(field, parser)); + return (long[]) ((Cache)caches.get(Long.TYPE)).get(reader, new Entry(field, parser)); } /** @deprecated Will be removed in 3.0, this is for binary compatibility only */ public long[] getLongs(IndexReader reader, String field, ExtendedFieldCache.LongParser parser) throws IOException { - return (long[]) longsCache.get(reader, new Entry(field, parser)); + return (long[]) ((Cache)caches.get(Long.TYPE)).get(reader, new Entry(field, parser)); } - Cache longsCache = new Cache() { + static final class LongCache extends Cache { + LongCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; FieldCache.LongParser parser = (FieldCache.LongParser) entry.custom; if (parser == null) { try { - return getLongs(reader, field, DEFAULT_LONG_PARSER); + return wrapper.getLongs(reader, field, DEFAULT_LONG_PARSER); } catch (NumberFormatException ne) { - return getLongs(reader, field, NUMERIC_UTILS_LONG_PARSER); + return wrapper.getLongs(reader, field, NUMERIC_UTILS_LONG_PARSER); } } long[] retArray = null; @@ -405,27 +553,30 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public double[] getDoubles(IndexReader reader, String field, FieldCache.DoubleParser parser) throws IOException { - return (double[]) doublesCache.get(reader, new Entry(field, parser)); + return (double[]) ((Cache)caches.get(Double.TYPE)).get(reader, new Entry(field, parser)); } /** @deprecated Will be removed in 3.0, this is for binary compatibility only */ public double[] getDoubles(IndexReader reader, String field, ExtendedFieldCache.DoubleParser parser) throws IOException { - return (double[]) doublesCache.get(reader, new Entry(field, parser)); + return (double[]) ((Cache)caches.get(Double.TYPE)).get(reader, new Entry(field, parser)); } - Cache doublesCache = new Cache() { + static final class DoubleCache extends Cache { + DoubleCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; FieldCache.DoubleParser parser = (FieldCache.DoubleParser) entry.custom; if (parser == null) { try { - return getDoubles(reader, field, DEFAULT_DOUBLE_PARSER); + return wrapper.getDoubles(reader, field, DEFAULT_DOUBLE_PARSER); } catch (NumberFormatException ne) { - return getDoubles(reader, field, NUMERIC_UTILS_DOUBLE_PARSER); + return wrapper.getDoubles(reader, field, NUMERIC_UTILS_DOUBLE_PARSER); } } double[] retArray = null; @@ -457,14 +608,17 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public String[] getStrings(IndexReader reader, String field) throws IOException { - return (String[]) stringsCache.get(reader, field); + return (String[]) ((Cache)caches.get(String.class)).get(reader, new Entry(field, (Parser)null)); } - Cache stringsCache = new Cache() { + static final class StringCache extends Cache { + StringCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object fieldKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { - String field = StringHelper.intern((String) fieldKey); + String field = StringHelper.intern((String) entryKey.field); final String[] retArray = new String[reader.maxDoc()]; TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field)); @@ -489,14 +643,17 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public StringIndex getStringIndex(IndexReader reader, String field) throws IOException { - return (StringIndex) stringsIndexCache.get(reader, field); + return (StringIndex) ((Cache)caches.get(StringIndex.class)).get(reader, new Entry(field, (Parser)null)); } - Cache stringsIndexCache = new Cache() { + static final class StringIndexCache extends Cache { + StringIndexCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object fieldKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { - String field = StringHelper.intern((String) fieldKey); + String field = StringHelper.intern((String) entryKey.field); final int[] retArray = new int[reader.maxDoc()]; String[] mterms = new String[reader.maxDoc()+1]; TermDocs termDocs = reader.termDocs(); @@ -563,7 +720,7 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public Object getAuto(IndexReader reader, String field) throws IOException { - return autoCache.get(reader, field); + return ((Cache)caches.get(Object.class)).get(reader, new Entry(field, (Parser)null)); } /** @@ -571,11 +728,14 @@ class FieldCacheImpl implements ExtendedFieldCache { * Especially, guessing does not work with the new * {@link NumericField} type. */ - Cache autoCache = new Cache() { + static final class AutoCache extends Cache { + AutoCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object fieldKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { - String field = StringHelper.intern((String) fieldKey); + String field = StringHelper.intern((String) entryKey.field); TermEnum enumerator = reader.terms (new Term (field)); try { Term term = enumerator.term(); @@ -588,17 +748,17 @@ class FieldCacheImpl implements ExtendedFieldCache { try { Integer.parseInt (termtext); - ret = getInts (reader, field); + ret = wrapper.getInts (reader, field); } catch (NumberFormatException nfe1) { try { Long.parseLong(termtext); - ret = getLongs (reader, field); + ret = wrapper.getLongs (reader, field); } catch (NumberFormatException nfe2) { try { Float.parseFloat (termtext); - ret = getFloats (reader, field); + ret = wrapper.getFloats (reader, field); } catch (NumberFormatException nfe3) { - ret = getStringIndex (reader, field); + ret = wrapper.getStringIndex (reader, field); } } } @@ -615,13 +775,16 @@ class FieldCacheImpl implements ExtendedFieldCache { /** @deprecated */ public Comparable[] getCustom(IndexReader reader, String field, SortComparator comparator) throws IOException { - return (Comparable[]) customCache.get(reader, new Entry(field, comparator)); + return (Comparable[]) ((Cache)caches.get(Comparable.class)).get(reader, new Entry(field, comparator)); } /** @deprecated */ - Cache customCache = new Cache() { + static final class CustomCache extends Cache { + CustomCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; diff --git a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java index a9c479edfe0..38cd2448602 100644 --- a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java @@ -180,7 +180,7 @@ extends PriorityQueue { * caches comparators instead of term values. */ static final FieldCacheImpl.Cache Comparators = new FieldCacheImpl.Cache() { - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, FieldCacheImpl.Entry entryKey) throws IOException { FieldCacheImpl.Entry entry = (FieldCacheImpl.Entry) entryKey; String fieldname = entry.field; diff --git a/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java b/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java new file mode 100644 index 00000000000..f10d7daace7 --- /dev/null +++ b/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java @@ -0,0 +1,74 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.IdentityHashMap; +import java.util.Map; + +/** + * An average, best guess, MemoryModel that should work okay on most systems. + * + */ +public class AverageGuessMemoryModel extends MemoryModel { + // best guess primitive sizes + private final Map sizes = new IdentityHashMap() { + { + put(boolean.class, new Integer(1)); + put(byte.class, new Integer(1)); + put(char.class, new Integer(2)); + put(short.class, new Integer(2)); + put(int.class, new Integer(4)); + put(float.class, new Integer(4)); + put(double.class, new Integer(8)); + put(long.class, new Integer(8)); + } + }; + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.util.MemoryModel#getArraySize() + */ + public int getArraySize() { + return 16; + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.util.MemoryModel#getClassSize() + */ + public int getClassSize() { + return 8; + } + + /* (non-Javadoc) + * @see org.apache.lucene.util.MemoryModel#getPrimitiveSize(java.lang.Class) + */ + public int getPrimitiveSize(Class clazz) { + return ((Integer) sizes.get(clazz)).intValue(); + } + + /* (non-Javadoc) + * @see org.apache.lucene.util.MemoryModel#getReferenceSize() + */ + public int getReferenceSize() { + return 4; + } + +} diff --git a/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java b/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java new file mode 100644 index 00000000000..0ba1f7cdec1 --- /dev/null +++ b/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java @@ -0,0 +1,436 @@ +package org.apache.lucene.util; +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.CacheEntry; + +/** + * Provides methods for sanity checking that entries in the FieldCache + * are not wasteful or inconsistent. + * + *+ * Lucene 2.9 Introduced numerous enhancements into how the FieldCache + * is used by the low levels of Lucene searching (for Sorting and + * ValueSourceQueries) to improve both the speed for Sorting, as well + * as reopening of IndexReaders. But these changes have shifted the + * usage of FieldCache from "top level" IndexReaders (frequently a + * MultiReader or DirectoryReader) down to the leaf level SegmentReaders. + * As a result, existing applications that directly access the FieldCache + * may find RAM usage increase significantly when upgrading to 2.9 or + * Later. This class provides an API for these applications (or their + * Unit tests) to check at run time if the FieldCache contains "insane" + * usages of the FieldCache. + *
+ *+ * EXPERIMENTAL API: This API is considered extremely advanced and + * experimental. It may be removed or altered w/o warning in future releases + * of Lucene. + *
+ * @see FieldCache + * @see FieldCacheSanityChecker.Insanity + * @see FieldCacheSanityChecker.InsanityType + */ +public final class FieldCacheSanityChecker { + + private RamUsageEstimator ramCalc = null; + public FieldCacheSanityChecker() { + /* NOOP */ + } + /** + * If set, will be used to estimate size for all CacheEntry objects + * dealt with. + */ + public void setRamUsageEstimator(RamUsageEstimator r) { + ramCalc = r; + } + + + /** + * Quick and dirty convenience method + * @see #check + */ + public static Insanity[] checkSanity(FieldCache cache) { + return checkSanity(cache.getCacheEntries()); + } + + /** + * Quick and dirty convenience method that instantiates an instance with + * "good defaults" and uses it to test the CacheEntry[] + * @see #check + */ + public static Insanity[] checkSanity(CacheEntry[] cacheEntries) { + FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker(); + // doesn't check for interned + sanityChecker.setRamUsageEstimator(new RamUsageEstimator(false)); + return sanityChecker.check(cacheEntries); + } + + + /** + * Tests a CacheEntry[] for indication of "insane" cache usage. + *+ * NOTE:FieldCache CreationPlaceholder objects are ignored. + * (:TODO: is this a bad idea? are we masking a real problem?) + *
+ */ + public Insanity[] check(CacheEntry[] cacheEntries) { + if (null == cacheEntries || 0 == cacheEntries.length) + return new Insanity[0]; + + if (null != ramCalc) { + for (int i = 0; i < cacheEntries.length; i++) { + cacheEntries[i].estimateSize(ramCalc); + } + } + + // the indirect mapping lets MapOfSet dedup identical valIds for us + // + // maps the (valId) identityhashCode of cache values to + // sets of CacheEntry instances + final MapOfSets valIdToItems = new MapOfSets(new HashMap(17)); + // maps ReaderField keys to Sets of ValueIds + final MapOfSets readerFieldToValIds = new MapOfSets(new HashMap(17)); + // + + // any keys that we know result in more then one valId + final Set valMismatchKeys = new HashSet(); + + // iterate over all the cacheEntries to get the mappings we'll need + for (int i = 0; i < cacheEntries.length; i++) { + final CacheEntry item = cacheEntries[i]; + final Object val = item.getValue(); + + if (val instanceof FieldCache.CreationPlaceholder) + continue; + + final ReaderField rf = new ReaderField(item.getReaderKey(), + item.getFieldName()); + + final Integer valId = new Integer(System.identityHashCode(val)); + + // indirect mapping, so the MapOfSet will dedup identical valIds for us + valIdToItems.put(valId, item); + if (1 < readerFieldToValIds.put(rf, valId)) { + valMismatchKeys.add(rf); + } + } + + final List insanity = new ArrayList(valMismatchKeys.size() * 3); + + insanity.addAll(checkValueMismatch(valIdToItems, + readerFieldToValIds, + valMismatchKeys)); + insanity.addAll(checkSubreaders(valIdToItems, + readerFieldToValIds)); + + return (Insanity[]) insanity.toArray(new Insanity[insanity.size()]); + } + + /** + * Internal helper method used by check that iterates over + * valMismatchKeys and generates a Collection of Insanity + * instances accordingly. The MapOfSets are used to populate + * the Insantiy objects. + * @see InsanityType#VALUEMISMATCH + */ + private Collection checkValueMismatch(MapOfSets valIdToItems, + MapOfSets readerFieldToValIds, + Set valMismatchKeys) { + + final List insanity = new ArrayList(valMismatchKeys.size() * 3); + + if (! valMismatchKeys.isEmpty() ) { + // we have multiple values for some ReaderFields + + final Map rfMap = readerFieldToValIds.getMap(); + final Map valMap = valIdToItems.getMap(); + final Iterator mismatchIter = valMismatchKeys.iterator(); + while (mismatchIter.hasNext()) { + final ReaderField rf = (ReaderField)mismatchIter.next(); + final List badEntries = new ArrayList(valMismatchKeys.size() * 2); + final Iterator valIter = ((Set)rfMap.get(rf)).iterator(); + while (valIter.hasNext()) { + Iterator entriesIter = ((Set)valMap.get(valIter.next())).iterator(); + while (entriesIter.hasNext()) { + badEntries.add(entriesIter.next()); + } + } + + CacheEntry[] badness = new CacheEntry[badEntries.size()]; + badness = (CacheEntry[]) badEntries.toArray(badness); + + insanity.add(new Insanity(InsanityType.VALUEMISMATCH, + "Multiple distinct value objects for " + + rf.toString(), badness)); + } + } + return insanity; + } + + /** + * Internal helper method used by check that iterates over + * the keys of readerFieldToValIds and generates a Collection + * of Insanity instances whenever two (or more) ReaderField instances are + * found that have an ancestery relationships. + * + * @see InsanityType#SUBREADER + */ + private Collection checkSubreaders(MapOfSets valIdToItems, + MapOfSets readerFieldToValIds) { + + final List insanity = new ArrayList(23); + + Map badChildren = new HashMap(17); + MapOfSets badKids = new MapOfSets(badChildren); // wrapper + + Map viToItemSets = valIdToItems.getMap(); + Map rfToValIdSets = readerFieldToValIds.getMap(); + + Set seen = new HashSet(17); + + Set readerFields = rfToValIdSets.keySet(); + Iterator rfIter = readerFields.iterator(); + while (rfIter.hasNext()) { + ReaderField rf = (ReaderField) rfIter.next(); + + if (seen.contains(rf)) continue; + + List kids = getAllDecendentReaderKeys(rf.readerKey); + for (int i = 0; i < kids.size(); i++) { + ReaderField kid = new ReaderField(kids.get(i), rf.fieldName); + + if (badChildren.containsKey(kid)) { + // we've already process this kid as RF and found other problems + // track those problems as our own + badKids.put(rf, kid); + badKids.putAll(rf, (Collection)badChildren.get(kid)); + badChildren.remove(kid); + + } else if (rfToValIdSets.containsKey(kid)) { + // we have cache entries for the kid + badKids.put(rf, kid); + } + seen.add(kid); + } + seen.add(rf); + } + + // every mapping in badKids represents an Insanity + Iterator parentsIter = badChildren.keySet().iterator(); + while (parentsIter.hasNext()) { + ReaderField parent = (ReaderField) parentsIter.next(); + Set kids = (Set) badChildren.get(parent); + + List badEntries = new ArrayList(kids.size() * 2); + + // put parent entr(ies) in first + { + Iterator valIter =((Set)rfToValIdSets.get(parent)).iterator(); + while (valIter.hasNext()) { + badEntries.addAll((Set)viToItemSets.get(valIter.next())); + } + } + + // now the entries for the descendants + Iterator kidsIter = kids.iterator(); + while (kidsIter.hasNext()) { + ReaderField kid = (ReaderField) kidsIter.next(); + Iterator valIter =((Set)rfToValIdSets.get(kid)).iterator(); + while (valIter.hasNext()) { + badEntries.addAll((Set)viToItemSets.get(valIter.next())); + } + } + + CacheEntry[] badness = new CacheEntry[badEntries.size()]; + badness = (CacheEntry[]) badEntries.toArray(badness); + + insanity.add(new Insanity(InsanityType.SUBREADER, + "Found caches for decendents of " + + parent.toString(), + badness)); + } + + return insanity; + + } + + /** + * Checks if the seed is an IndexReader, and if so will walk + * the hierarchy of subReaders building up a list of the objects + * returned by obj.getFieldCacheKey() + */ + private List getAllDecendentReaderKeys(Object seed) { + List all = new ArrayList(17); // will grow as we iter + all.add(seed); + for (int i = 0; i < all.size(); i++) { + Object obj = all.get(i); + if (obj instanceof IndexReader) { + IndexReader[] subs = ((IndexReader)obj).getSequentialSubReaders(); + for (int j = 0; (null != subs) && (j < subs.length); j++) { + all.add(subs[j].getFieldCacheKey()); + } + } + + } + // need to skip the first, because it was the seed + return all.subList(1, all.size()); + } + + /** + * Simple pair object for using "readerKey + fieldName" a Map key + */ + private final static class ReaderField { + public final Object readerKey; + public final String fieldName; + public ReaderField(Object readerKey, String fieldName) { + this.readerKey = readerKey; + this.fieldName = fieldName; + } + public int hashCode() { + return System.identityHashCode(readerKey) * fieldName.hashCode(); + } + public boolean equals(Object that) { + if (! (that instanceof ReaderField)) return false; + + ReaderField other = (ReaderField) that; + return (this.readerKey == other.readerKey && + this.fieldName.equals(other.fieldName)); + } + public String toString() { + return readerKey.toString() + "+" + fieldName; + } + } + + /** + * Simple container for a collection of related CacheEntry objects that + * in conjunction with eachother represent some "insane" usage of the + * FieldCache. + */ + public final static class Insanity { + private final InsanityType type; + private final String msg; + private final CacheEntry[] entries; + public Insanity(InsanityType type, String msg, CacheEntry[] entries) { + if (null == type) { + throw new IllegalArgumentException + ("Insanity requires non-null InsanityType"); + } + if (null == entries || 0 == entries.length) { + throw new IllegalArgumentException + ("Insanity requires non-null/non-empty CacheEntry[]"); + } + this.type = type; + this.msg = msg; + this.entries = entries; + + } + /** + * Type of insane behavior this object represents + */ + public InsanityType getType() { return type; } + /** + * Description of hte insane behavior + */ + public String getMsg() { return msg; } + /** + * CacheEntry objects which suggest a problem + */ + public CacheEntry[] getCacheEntries() { return entries; } + /** + * Multi-Line representation of this Insanity object, starting with + * the Type and Msg, followed by each CacheEntry.toString() on it's + * own line prefaced by a tab character + */ + public String toString() { + StringBuffer buf = new StringBuffer(); + buf.append(getType()).append(": "); + + String m = getMsg(); + if (null != m) buf.append(m); + + buf.append('\n'); + + CacheEntry[] ce = getCacheEntries(); + for (int i = 0; i < ce.length; i++) { + buf.append('\t').append(ce[i].toString()).append('\n'); + } + + return buf.toString(); + } + } + + /** + * An Enumaration of the differnet types of "insane" behavior that + * may be detected in a FieldCache. + * + * @see InsanityType#SUBREADER + * @see InsanityType#VALUEMISMATCH + * @see InsanityType#EXPECTED + */ + public final static class InsanityType { + private final String label; + private InsanityType(final String label) { + this.label = label; + } + public String toString() { return label; } + + /** + * Indicates an overlap in cache usage on a given field + * in sub/super readers. + */ + public final static InsanityType SUBREADER + = new InsanityType("SUBREADER"); + + /** + *+ * Indicates entries have the same reader+fieldname but + * different cached values. This can happen if different datatypes, + * or parsers are used -- and while it's not necessarily a bug + * it's typically an indication of a possible problem. + *
+ *
+ *
tearDown()
in your unit test, make sure you
- * call super.setUp()
and
- * super.tearDown()
.
- */
+import junit.framework.TestCase;
+import org.apache.lucene.index.ConcurrentMergeScheduler;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.FieldCache.CacheEntry;
+import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
+
+/**
+ * Base class for all Lucene unit tests.
+ * + * Currently the + * only added functionality over JUnit's TestCase is + * asserting that no unhandled exceptions occurred in + * threads launched by ConcurrentMergeScheduler and asserting sane + * FieldCache usage athe moment of tearDown. + *
+ *
+ * If you
+ * override either setUp()
or
+ * tearDown()
in your unit test, make sure you
+ * call super.setUp()
and
+ * super.tearDown()
+ *
+ * This method will be called by tearDown to clean up FieldCache.DEFAULT. + * If a (poorly written) test has some expectation that the FieldCache + * will persist across test methods (ie: a static IndexReader) this + * method can be overridden to do nothing. + *
+ * @see FieldCache#purgeAllCaches() + */ + protected void purgeFieldCache(final FieldCache fc) { + fc.purgeAllCaches(); + } + + protected String getTestLabel() { + return getClass().getName() + "." + getName(); + } + protected void tearDown() throws Exception { - if (ConcurrentMergeScheduler.anyUnhandledExceptions()) { - // Clear the failure so that we don't just keep - // failing subsequent test cases - ConcurrentMergeScheduler.clearUnhandledExceptions(); - fail("ConcurrentMergeScheduler hit unhandled exceptions"); + try { + // this isn't as useful as calling directly from the scope where the + // index readers are used, because they could be gc'ed just before + // tearDown is called. + // But it's better then nothing. + assertSaneFieldCaches(getTestLabel()); + + if (ConcurrentMergeScheduler.anyUnhandledExceptions()) { + // Clear the failure so that we don't just keep + // failing subsequent test cases + ConcurrentMergeScheduler.clearUnhandledExceptions(); + fail("ConcurrentMergeScheduler hit unhandled exceptions"); + } + } finally { + purgeFieldCache(FieldCache.DEFAULT); } } + + /** + * Asserts that FieldCacheSanityChecker does not detect any + * problems with FieldCache.DEFAULT. + *+ * If any problems are found, they are logged to System.err + * (allong with the msg) when the Assertion is thrown. + *
+ *+ * This method is called by tearDown after every test method, + * however IndexReaders scoped inside test methods may be garbage + * collected prior to this method being called, causing errors to + * be overlooked. Tests are encouraged to keep their IndexReaders + * scoped at the class level, or to explicitly call this method + * directly in the same scope as the IndexReader. + *
+ * @see FieldCacheSanityChecker + */ + protected void assertSaneFieldCaches(final String msg) { + final CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries(); + Insanity[] insanity = null; + try { + try { + insanity = FieldCacheSanityChecker.checkSanity(entries); + } catch (RuntimeException e) { + dumpArray(msg+ ": FieldCache", entries, System.err); + throw e; + } + + assertEquals(msg + ": Insane FieldCache usage(s) found", + 0, insanity.length); + insanity = null; + } finally { + + // report this in the event of any exception/failure + // if no failure, then insanity will be null anyway + if (null != insanity) { + dumpArray(msg + ": Insane FieldCache usage(s)", insanity, System.err); + } + + } + } + + /** + * Convinience method for logging an iterator. + * @param label String logged before/after the items in the iterator + * @param iter Each next() is toString()ed and logged on it's own line. If iter is null this is logged differnetly then an empty iterator. + * @param stream Stream to log messages to. + */ + public static void dumpIterator(String label, Iterator iter, + PrintStream stream) { + stream.println("*** BEGIN "+label+" ***"); + if (null == iter) { + stream.println(" ... NULL ..."); + } else { + while (iter.hasNext()) { + stream.println(iter.next().toString()); + } + } + stream.println("*** END "+label+" ***"); + } + + /** + * Convinience method for logging an array. Wraps the array in an iterator and delegates + * @see dumpIterator(String,Iterator,PrintStream) + */ + public static void dumpArray(String label, Object[] objs, + PrintStream stream) { + Iterator iter = (null == objs) ? null : Arrays.asList(objs).iterator(); + dumpIterator(label, iter, stream); + } /** * Returns a {@link Random} instance for generating random numbers during the test. diff --git a/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java b/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java new file mode 100644 index 00000000000..ca1dd3ae104 --- /dev/null +++ b/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java @@ -0,0 +1,181 @@ +package org.apache.lucene.util; + +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.FieldCacheSanityChecker.Insanity; +import org.apache.lucene.util.FieldCacheSanityChecker.InsanityType; + +import java.io.IOException; + +public class TestFieldCacheSanityChecker extends LuceneTestCase { + + protected IndexReader readerA; + protected IndexReader readerB; + protected IndexReader readerX; + + private static final int NUM_DOCS = 1000; + + protected void setUp() throws Exception { + super.setUp(); + + RAMDirectory dirA = new RAMDirectory(); + RAMDirectory dirB = new RAMDirectory(); + + IndexWriter wA = new IndexWriter(dirA, new WhitespaceAnalyzer(), true, + IndexWriter.MaxFieldLength.LIMITED); + IndexWriter wB = new IndexWriter(dirB, new WhitespaceAnalyzer(), true, + IndexWriter.MaxFieldLength.LIMITED); + + long theLong = Long.MAX_VALUE; + double theDouble = Double.MAX_VALUE; + byte theByte = Byte.MAX_VALUE; + short theShort = Short.MAX_VALUE; + int theInt = Integer.MAX_VALUE; + float theFloat = Float.MAX_VALUE; + for (int i = 0; i < NUM_DOCS; i++){ + Document doc = new Document(); + doc.add(new Field("theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theDouble", String.valueOf(theDouble--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theByte", String.valueOf(theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theShort", String.valueOf(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theInt", String.valueOf(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theFloat", String.valueOf(theFloat--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + if (0 == i % 3) { + wA.addDocument(doc); + } else { + wB.addDocument(doc); + } + } + wA.close(); + wB.close(); + readerA = IndexReader.open(dirA); + readerB = IndexReader.open(dirB); + readerX = new MultiReader(new IndexReader[] { readerA, readerB }); + } + + public void tearDown() throws Exception { + super.tearDown(); + readerA.close(); + readerB.close(); + readerX.close(); + } + + public void testSanity() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + double [] doubles; + int [] ints; + + doubles = cache.getDoubles(readerA, "theDouble"); + doubles = cache.getDoubles(readerA, "theDouble", + FieldCache.DEFAULT_DOUBLE_PARSER); + doubles = cache.getDoubles(readerB, "theDouble", + FieldCache.DEFAULT_DOUBLE_PARSER); + + ints = cache.getInts(readerX, "theInt"); + ints = cache.getInts(readerX, "theInt", + FieldCache.DEFAULT_INT_PARSER); + + // // // + + Insanity[] insanity = + FieldCacheSanityChecker.checkSanity(cache.getCacheEntries()); + + if (0 < insanity.length) + dumpArray(getTestLabel() + " INSANITY", insanity, System.err); + + assertEquals("shouldn't be any cache insanity", 0, insanity.length); + cache.purgeAllCaches(); + } + + public void testInsanity1() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + int [] ints; + String [] strings; + byte [] bytes; + + ints = cache.getInts(readerX, "theInt", FieldCache.DEFAULT_INT_PARSER); + strings = cache.getStrings(readerX, "theInt"); + + // this one is ok + bytes = cache.getBytes(readerX, "theByte"); + + // // // + + Insanity[] insanity = + FieldCacheSanityChecker.checkSanity(cache.getCacheEntries()); + + assertEquals("wrong number of cache errors", 1, insanity.length); + assertEquals("wrong type of cache error", + InsanityType.VALUEMISMATCH, + insanity[0].getType()); + assertEquals("wrong number of entries in cache error", 2, + insanity[0].getCacheEntries().length); + + // we expect bad things, don't let tearDown complain about them + cache.purgeAllCaches(); + } + + public void testInsanity2() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + String [] strings; + byte [] bytes; + + strings = cache.getStrings(readerA, "theString"); + strings = cache.getStrings(readerB, "theString"); + strings = cache.getStrings(readerX, "theString"); + + // this one is ok + bytes = cache.getBytes(readerX, "theByte"); + + + // // // + + Insanity[] insanity = + FieldCacheSanityChecker.checkSanity(cache.getCacheEntries()); + + assertEquals("wrong number of cache errors", 1, insanity.length); + assertEquals("wrong type of cache error", + InsanityType.SUBREADER, + insanity[0].getType()); + assertEquals("wrong number of entries in cache error", 3, + insanity[0].getCacheEntries().length); + + // we expect bad things, don't let tearDown complain about them + cache.purgeAllCaches(); + } + + public void testInsanity3() throws IOException { + + // :TODO: subreader tree walking is really hairy ... add more crazy tests. + } + +} diff --git a/src/test/org/apache/lucene/util/TestRamUsageEstimator.java b/src/test/org/apache/lucene/util/TestRamUsageEstimator.java new file mode 100644 index 00000000000..124ee6852d1 --- /dev/null +++ b/src/test/org/apache/lucene/util/TestRamUsageEstimator.java @@ -0,0 +1,57 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import junit.framework.TestCase; + +public class TestRamUsageEstimator extends TestCase { + + public void testBasic() { + String string = new String("test str"); + RamUsageEstimator rue = new RamUsageEstimator(); + long size = rue.estimateRamUsage(string); + System.out.println("size:" + size); + + string = new String("test strin"); + size = rue.estimateRamUsage(string); + System.out.println("size:" + size); + + Holder holder = new Holder(); + holder.holder = new Holder("string2", 5000L); + size = rue.estimateRamUsage(holder); + System.out.println("size:" + size); + + String[] strings = new String[]{new String("test strin"), new String("hollow"), new String("catchmaster")}; + size = rue.estimateRamUsage(strings); + System.out.println("size:" + size); + } + + private static final class Holder { + long field1 = 5000L; + String name = "name"; + Holder holder; + + Holder() { + } + + Holder(String name, long field1) { + this.name = name; + this.field1 = field1; + } + } +}