diff --git a/CHANGES.txt b/CHANGES.txt index cdbc31b36be..6b0b3553a3f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -667,7 +667,17 @@ New features 35. LUCENE-1790: Added BoostingFunctionTermQuery to enable scoring of payloads based on the maximum payload seen for a document. - Slight refactoring of Similarity and other payload queries (Grant Ingersoll) + Slight refactoring of Similarity and other payload queries (Grant Ingersoll) + +36. LUCENE-1749: Addition of FieldCacheSanityChecker utility, and + hooks to use it in all existing Lucene Tests. This class can + be used by any application to inspect the FieldCache and provide + diagnostic information about the possibility of inconsistent + FieldCache usage. Namely: FieldCache entries for the same field + with different datatypes or parsers; and FieldCache entries for + the same field in both a reader, and one of it's (descendant) sub + readers. + (Chris Hostetter, Mark Miller) Optimizations diff --git a/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java b/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java index 42aa6b9f826..6c4a93ed596 100644 --- a/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java +++ b/contrib/remote/src/test/org/apache/lucene/search/TestRemoteSort.java @@ -244,11 +244,18 @@ public class TestRemoteSort extends LuceneTestCase implements Serializable { assertMatches (multi, queryX, sort, "CAIEG"); sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true)); assertMatches (multi, queryY, sort, "HJDBF"); + + assertSaneFieldCaches(getName() + " ComparatorSource"); + FieldCache.DEFAULT.purgeAllCaches(); + SortComparator custom = SampleComparable.getComparator(); sort.setSort (new SortField ("custom", custom)); assertMatches (multi, queryX, sort, "CAIEG"); sort.setSort (new SortField ("custom", custom, true)); assertMatches (multi, queryY, sort, "HJDBF"); + + assertSaneFieldCaches(getName() + " Comparator"); + FieldCache.DEFAULT.purgeAllCaches(); } // test that the relevancy scores are the same even if @@ -343,12 +350,6 @@ public class TestRemoteSort extends LuceneTestCase implements Serializable { sort.setSort("string", true); assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - sort.setSort(new SortField[] { new SortField ("string", Locale.US) }); - assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - - sort.setSort(new SortField[] { new SortField ("string", Locale.US, true) }); - assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - sort.setSort(new String[] {"int","float"}); assertMatches(multi, queryA, sort, "IDHFGJEABC"); @@ -369,6 +370,21 @@ public class TestRemoteSort extends LuceneTestCase implements Serializable { sort.setSort("string", true); assertMatches(multi, queryF, sort, "IJZ"); + + // up to this point, all of the searches should have "sane" + // FieldCache behavior, and should have reused hte cache in several cases + assertSaneFieldCaches(getName() + " Basics"); + // next we'll check an alternate Locale for string, so purge first + FieldCache.DEFAULT.purgeAllCaches(); + + sort.setSort(new SortField[] { new SortField ("string", Locale.US) }); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); + + sort.setSort(new SortField[] { new SortField ("string", Locale.US, true)}); + assertMatches(multi, queryA, sort, "CBEFGHIAJD"); + + assertSaneFieldCaches(getName() + " Locale.US"); + FieldCache.DEFAULT.purgeAllCaches(); } // make sure the documents returned by the search match the expected list diff --git a/src/java/org/apache/lucene/search/FieldCache.java b/src/java/org/apache/lucene/search/FieldCache.java index 8c6917491d7..81f13d7f7e9 100644 --- a/src/java/org/apache/lucene/search/FieldCache.java +++ b/src/java/org/apache/lucene/search/FieldCache.java @@ -19,12 +19,15 @@ package org.apache.lucene.search; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.analysis.NumericTokenStream; // for javadocs import java.io.IOException; import java.io.Serializable; +import java.text.DecimalFormat; + /** * Expert: Maintains caches of term values. * @@ -32,9 +35,14 @@ import java.io.Serializable; * * @since lucene 1.4 * @version $Id$ + * @see org.apache.lucene.util.FieldCacheSanityChecker */ public interface FieldCache { + public static final class CreationPlaceholder { + Object value; + } + /** Indicator for StringIndex values in the cache. */ // NOTE: the value assigned to this constant must not be // the same as any of those in SortField!! @@ -146,6 +154,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_BYTE_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER"; + } }; /** The default parser for short values, which are encoded by {@link Short#toString(short)} */ @@ -156,6 +167,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_SHORT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER"; + } }; /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */ @@ -166,6 +180,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_INT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_INT_PARSER"; + } }; /** The default parser for float values, which are encoded by {@link Float#toString(float)} */ @@ -176,6 +193,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_FLOAT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER"; + } }; /** The default parser for long values, which are encoded by {@link Long#toString(long)} */ @@ -186,6 +206,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_LONG_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_LONG_PARSER"; + } }; /** The default parser for double values, which are encoded by {@link Double#toString(double)} */ @@ -196,6 +219,9 @@ public interface FieldCache { protected Object readResolve() { return DEFAULT_DOUBLE_PARSER; } + public String toString() { + return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER"; + } }; /** @@ -212,6 +238,9 @@ public interface FieldCache { protected Object readResolve() { return NUMERIC_UTILS_INT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER"; + } }; /** @@ -228,6 +257,9 @@ public interface FieldCache { protected Object readResolve() { return NUMERIC_UTILS_FLOAT_PARSER; } + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER"; + } }; /** @@ -244,6 +276,9 @@ public interface FieldCache { protected Object readResolve() { return NUMERIC_UTILS_LONG_PARSER; } + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER"; + } }; /** @@ -260,6 +295,9 @@ public interface FieldCache { protected Object readResolve() { return NUMERIC_UTILS_DOUBLE_PARSER; } + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER"; + } }; /** Checks the internal cache for an appropriate entry, and if none is @@ -477,5 +515,105 @@ public interface FieldCache { */ public Comparable[] getCustom (IndexReader reader, String field, SortComparator comparator) throws IOException; + + /** + * EXPERT: A unique Identifier/Description for each item in the FieldCache. + * Can be useful for logging/debugging. + *

+ * EXPERIMENTAL API: This API is considered extremely advanced + * and experimental. It may be removed or altered w/o warning in future + * releases + * of Lucene. + *

+ */ + public static abstract class CacheEntry { + public abstract Object getReaderKey(); + public abstract String getFieldName(); + public abstract Class getCacheType(); + public abstract Object getCustom(); + public abstract Object getValue(); + private String size = null; + protected final void setEstimatedSize(String size) { + this.size = size; + } + /** + * @see #estimateSize(RamUsageEstimator) + */ + public void estimateSize() { + estimateSize(new RamUsageEstimator(false)); // doesn't check for interned + } + /** + * Computes (and stores) the estimated size of the cache Value + * @see #getEstimatedSize + */ + public void estimateSize(RamUsageEstimator ramCalc) { + long size = ramCalc.estimateRamUsage(getValue()); + setEstimatedSize(RamUsageEstimator.humanReadableUnits + (size, new DecimalFormat("0.#"))); + + } + /** + * The most recently estimated size of the value, null unless + * estimateSize has been called. + */ + public final String getEstimatedSize() { + return size; + } + + + public String toString() { + StringBuffer b = new StringBuffer(); + b.append("'").append(getReaderKey()).append("'=>"); + b.append("'").append(getFieldName()).append("',"); + b.append(getCacheType()).append(",").append(getCustom()); + b.append("=>").append(getValue().getClass().getName()).append("#"); + b.append(System.identityHashCode(getValue())); + + String s = getEstimatedSize(); + if(null != s) { + b.append(" (size =~ ").append(s).append(')'); + } + + return b.toString(); + } + } + + /** + * EXPERT: Generates an array of CacheEntry objects representing all items + * currently in the FieldCache. + *

+ * NOTE: These CacheEntry objects maintain a strong refrence to the + * Cached Values. Maintaining refrences to a CacheEntry the IndexReader + * associated with it has garbage collected will prevent the Value itself + * from being garbage collected when the Cache drops the WeakRefrence. + *

+ *

+ * EXPERIMENTAL API: This API is considered extremely advanced + * and experimental. It may be removed or altered w/o warning in future + * releases + * of Lucene. + *

+ */ + public abstract CacheEntry[] getCacheEntries(); + + /** + *

+ * EXPERT: Instructs the FieldCache to forcibly expunge all entries + * from the underlying caches. This is intended only to be used for + * test methods as a way to ensure a known base state of the Cache + * (with out needing to rely on GC to free WeakReferences). + * It should not be relied on for "Cache maintenance" in general + * application code. + *

+ *

+ * EXPERIMENTAL API: This API is considered extremely advanced + * and experimental. It may be removed or altered w/o warning in future + * releases + * of Lucene. + *

+ */ + public abstract void purgeAllCaches(); + + } diff --git a/src/java/org/apache/lucene/search/FieldCacheImpl.java b/src/java/org/apache/lucene/search/FieldCacheImpl.java index 8b1e1da4e3b..61fbbab798d 100644 --- a/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -17,18 +17,22 @@ package org.apache.lucene.search; * limitations under the License. */ +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.WeakHashMap; + +import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; import org.apache.lucene.util.StringHelper; -import java.io.IOException; -import java.util.HashMap; -import java.util.Locale; -import java.util.Map; -import java.util.WeakHashMap; - /** * Expert: The default cache implementation, storing all values in memory. * A WeakHashMap is used for storage. @@ -41,6 +45,116 @@ import java.util.WeakHashMap; // TODO: change interface to FieldCache in 3.0 when removed class FieldCacheImpl implements ExtendedFieldCache { + private Map caches; + FieldCacheImpl() { + init(); + } + private synchronized void init() { + caches = new HashMap(7); + caches.put(Byte.TYPE, new ByteCache(this)); + caches.put(Short.TYPE, new ShortCache(this)); + caches.put(Integer.TYPE, new IntCache(this)); + caches.put(Float.TYPE, new FloatCache(this)); + caches.put(Long.TYPE, new LongCache(this)); + caches.put(Double.TYPE, new DoubleCache(this)); + caches.put(String.class, new StringCache(this)); + caches.put(StringIndex.class, new StringIndexCache(this)); + caches.put(Comparable.class, new CustomCache(this)); + caches.put(Object.class, new AutoCache(this)); + } + + public void purgeAllCaches() { + init(); + } + + public CacheEntry[] getCacheEntries() { + List result = new ArrayList(17); + Iterator outerKeys = caches.keySet().iterator(); + while (outerKeys.hasNext()) { + Class cacheType = (Class)outerKeys.next(); + Cache cache = (Cache)caches.get(cacheType); + Iterator innerKeys = cache.readerCache.keySet().iterator(); + while (innerKeys.hasNext()) { + // we've now materialized a hard ref + Object readerKey = innerKeys.next(); + // innerKeys was backed by WeakHashMap, sanity check + // that it wasn't GCed before we made hard ref + if (null != readerKey && cache.readerCache.containsKey(readerKey)) { + Map innerCache = ((Map)cache.readerCache.get(readerKey)); + Iterator keys = innerCache.keySet().iterator(); + while (keys.hasNext()) { + Entry entry = (Entry) keys.next(); + result.add(new CacheEntryImpl(readerKey, entry.field, + cacheType, entry.type, + entry.custom, entry.locale, + innerCache.get(entry))); + } + } + } + } + return (CacheEntry[]) result.toArray(new CacheEntry[result.size()]); + } + + private static final class CacheEntryImpl extends CacheEntry { + /** + * @deprecated Only needed because of Entry (ab)use by + * FieldSortedHitQueue, remove when FieldSortedHitQueue + * is removed + */ + private final int sortFieldType; + /** + * @deprecated Only needed because of Entry (ab)use by + * FieldSortedHitQueue, remove when FieldSortedHitQueue + * is removed + */ + private final Locale locale; + + private final Object readerKey; + private final String fieldName; + private final Class cacheType; + private final Object custom; + private final Object value; + CacheEntryImpl(Object readerKey, String fieldName, + Class cacheType, int sortFieldType, + Object custom, Locale locale, + Object value) { + this.readerKey = readerKey; + this.fieldName = fieldName; + this.cacheType = cacheType; + this.sortFieldType = sortFieldType; + this.custom = custom; + this.locale = locale; + this.value = value; + + // :HACK: for testing. +// if (null != locale || SortField.CUSTOM != sortFieldType) { +// throw new RuntimeException("Locale/sortFieldType: " + this); +// } + + } + public Object getReaderKey() { return readerKey; } + public String getFieldName() { return fieldName; } + public Class getCacheType() { return cacheType; } + public Object getCustom() { return custom; } + public Object getValue() { return value; } + /** + * Adds warning to super.toString if Local or sortFieldType were specified + * @deprecated Only needed because of Entry (ab)use by + * FieldSortedHitQueue, remove when FieldSortedHitQueue + * is removed + */ + public String toString() { + String r = super.toString(); + if (null != locale) { + r = r + "...!!!Locale:" + locale + "???"; + } + if (SortField.CUSTOM != sortFieldType) { + r = r + "...!!!SortType:" + sortFieldType + "???"; + } + return r; + } + } + /** * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops * processing terms and returns the current FieldCache @@ -51,16 +165,25 @@ class FieldCacheImpl implements ExtendedFieldCache { /** Expert: Internal cache. */ abstract static class Cache { - private final Map readerCache = new WeakHashMap(); + Cache() { + this.wrapper = null; + } + + Cache(FieldCache wrapper) { + this.wrapper = wrapper; + } + + final FieldCache wrapper; + + final Map readerCache = new WeakHashMap(); - protected abstract Object createValue(IndexReader reader, Object key) + protected abstract Object createValue(IndexReader reader, Entry key) throws IOException; - public Object get(IndexReader reader, Object key) throws IOException { + public Object get(IndexReader reader, Entry key) throws IOException { Map innerCache; Object value; final Object readerKey = reader.getFieldCacheKey(); - synchronized (readerCache) { innerCache = (Map) readerCache.get(readerKey); if (innerCache == null) { @@ -91,18 +214,25 @@ class FieldCacheImpl implements ExtendedFieldCache { } } - static final class CreationPlaceholder { - Object value; - } - /** Expert: Every composite-key in the internal cache is of this type. */ static class Entry { final String field; // which Fieldable + /** + * @deprecated Only (ab)used by FieldSortedHitQueue, + * remove when FieldSortedHitQueue is removed + */ final int type; // which SortField type final Object custom; // which custom comparator or parser + /** + * @deprecated Only (ab)used by FieldSortedHitQueue, + * remove when FieldSortedHitQueue is removed + */ final Locale locale; // the locale we're sorting (if string) - /** Creates one of these objects. */ + /** + * @deprecated Only (ab)used by FieldSortedHitQueue, + * remove when FieldSortedHitQueue is removed + */ Entry (String field, int type, Locale locale) { this.field = StringHelper.intern(field); this.type = type; @@ -118,7 +248,10 @@ class FieldCacheImpl implements ExtendedFieldCache { this.locale = null; } - /** Creates one of these objects for a custom type with parser, needed by FieldSortedHitQueue. */ + /** + * @deprecated Only (ab)used by FieldSortedHitQueue, + * remove when FieldSortedHitQueue is removed + */ Entry (String field, int type, Parser parser) { this.field = StringHelper.intern(field); this.type = type; @@ -157,18 +290,20 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public byte[] getBytes(IndexReader reader, String field, ByteParser parser) throws IOException { - return (byte[]) bytesCache.get(reader, new Entry(field, parser)); + return (byte[]) ((Cache)caches.get(Byte.TYPE)).get(reader, new Entry(field, parser)); } - Cache bytesCache = new Cache() { - - protected Object createValue(IndexReader reader, Object entryKey) + static final class ByteCache extends Cache { + ByteCache(FieldCache wrapper) { + super(wrapper); + } + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; ByteParser parser = (ByteParser) entry.custom; if (parser == null) { - return getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER); + return wrapper.getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER); } final byte[] retArray = new byte[reader.maxDoc()]; TermDocs termDocs = reader.termDocs(); @@ -200,18 +335,21 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public short[] getShorts(IndexReader reader, String field, ShortParser parser) throws IOException { - return (short[]) shortsCache.get(reader, new Entry(field, parser)); + return (short[]) ((Cache)caches.get(Short.TYPE)).get(reader, new Entry(field, parser)); } - Cache shortsCache = new Cache() { + static final class ShortCache extends Cache { + ShortCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; ShortParser parser = (ShortParser) entry.custom; if (parser == null) { - return getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER); + return wrapper.getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER); } final short[] retArray = new short[reader.maxDoc()]; TermDocs termDocs = reader.termDocs(); @@ -243,21 +381,24 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public int[] getInts(IndexReader reader, String field, IntParser parser) throws IOException { - return (int[]) intsCache.get(reader, new Entry(field, parser)); + return (int[]) ((Cache)caches.get(Integer.TYPE)).get(reader, new Entry(field, parser)); } - Cache intsCache = new Cache() { + static final class IntCache extends Cache { + IntCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; IntParser parser = (IntParser) entry.custom; if (parser == null) { try { - return getInts(reader, field, DEFAULT_INT_PARSER); + return wrapper.getInts(reader, field, DEFAULT_INT_PARSER); } catch (NumberFormatException ne) { - return getInts(reader, field, NUMERIC_UTILS_INT_PARSER); + return wrapper.getInts(reader, field, NUMERIC_UTILS_INT_PARSER); } } int[] retArray = null; @@ -295,24 +436,28 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public float[] getFloats(IndexReader reader, String field, FloatParser parser) - throws IOException { - return (float[]) floatsCache.get(reader, new Entry(field, parser)); + throws IOException { + + return (float[]) ((Cache)caches.get(Float.TYPE)).get(reader, new Entry(field, parser)); } - Cache floatsCache = new Cache() { + static final class FloatCache extends Cache { + FloatCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; FloatParser parser = (FloatParser) entry.custom; if (parser == null) { try { - return getFloats(reader, field, DEFAULT_FLOAT_PARSER); + return wrapper.getFloats(reader, field, DEFAULT_FLOAT_PARSER); } catch (NumberFormatException ne) { - return getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER); + return wrapper.getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER); } - } + } float[] retArray = null; TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field)); @@ -347,27 +492,30 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public long[] getLongs(IndexReader reader, String field, FieldCache.LongParser parser) throws IOException { - return (long[]) longsCache.get(reader, new Entry(field, parser)); + return (long[]) ((Cache)caches.get(Long.TYPE)).get(reader, new Entry(field, parser)); } /** @deprecated Will be removed in 3.0, this is for binary compatibility only */ public long[] getLongs(IndexReader reader, String field, ExtendedFieldCache.LongParser parser) throws IOException { - return (long[]) longsCache.get(reader, new Entry(field, parser)); + return (long[]) ((Cache)caches.get(Long.TYPE)).get(reader, new Entry(field, parser)); } - Cache longsCache = new Cache() { + static final class LongCache extends Cache { + LongCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; FieldCache.LongParser parser = (FieldCache.LongParser) entry.custom; if (parser == null) { try { - return getLongs(reader, field, DEFAULT_LONG_PARSER); + return wrapper.getLongs(reader, field, DEFAULT_LONG_PARSER); } catch (NumberFormatException ne) { - return getLongs(reader, field, NUMERIC_UTILS_LONG_PARSER); + return wrapper.getLongs(reader, field, NUMERIC_UTILS_LONG_PARSER); } } long[] retArray = null; @@ -405,27 +553,30 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public double[] getDoubles(IndexReader reader, String field, FieldCache.DoubleParser parser) throws IOException { - return (double[]) doublesCache.get(reader, new Entry(field, parser)); + return (double[]) ((Cache)caches.get(Double.TYPE)).get(reader, new Entry(field, parser)); } /** @deprecated Will be removed in 3.0, this is for binary compatibility only */ public double[] getDoubles(IndexReader reader, String field, ExtendedFieldCache.DoubleParser parser) throws IOException { - return (double[]) doublesCache.get(reader, new Entry(field, parser)); + return (double[]) ((Cache)caches.get(Double.TYPE)).get(reader, new Entry(field, parser)); } - Cache doublesCache = new Cache() { + static final class DoubleCache extends Cache { + DoubleCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; FieldCache.DoubleParser parser = (FieldCache.DoubleParser) entry.custom; if (parser == null) { try { - return getDoubles(reader, field, DEFAULT_DOUBLE_PARSER); + return wrapper.getDoubles(reader, field, DEFAULT_DOUBLE_PARSER); } catch (NumberFormatException ne) { - return getDoubles(reader, field, NUMERIC_UTILS_DOUBLE_PARSER); + return wrapper.getDoubles(reader, field, NUMERIC_UTILS_DOUBLE_PARSER); } } double[] retArray = null; @@ -457,14 +608,17 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public String[] getStrings(IndexReader reader, String field) throws IOException { - return (String[]) stringsCache.get(reader, field); + return (String[]) ((Cache)caches.get(String.class)).get(reader, new Entry(field, (Parser)null)); } - Cache stringsCache = new Cache() { + static final class StringCache extends Cache { + StringCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object fieldKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { - String field = StringHelper.intern((String) fieldKey); + String field = StringHelper.intern((String) entryKey.field); final String[] retArray = new String[reader.maxDoc()]; TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field)); @@ -489,14 +643,17 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public StringIndex getStringIndex(IndexReader reader, String field) throws IOException { - return (StringIndex) stringsIndexCache.get(reader, field); + return (StringIndex) ((Cache)caches.get(StringIndex.class)).get(reader, new Entry(field, (Parser)null)); } - Cache stringsIndexCache = new Cache() { + static final class StringIndexCache extends Cache { + StringIndexCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object fieldKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { - String field = StringHelper.intern((String) fieldKey); + String field = StringHelper.intern((String) entryKey.field); final int[] retArray = new int[reader.maxDoc()]; String[] mterms = new String[reader.maxDoc()+1]; TermDocs termDocs = reader.termDocs(); @@ -563,7 +720,7 @@ class FieldCacheImpl implements ExtendedFieldCache { // inherit javadocs public Object getAuto(IndexReader reader, String field) throws IOException { - return autoCache.get(reader, field); + return ((Cache)caches.get(Object.class)).get(reader, new Entry(field, (Parser)null)); } /** @@ -571,11 +728,14 @@ class FieldCacheImpl implements ExtendedFieldCache { * Especially, guessing does not work with the new * {@link NumericField} type. */ - Cache autoCache = new Cache() { + static final class AutoCache extends Cache { + AutoCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object fieldKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { - String field = StringHelper.intern((String) fieldKey); + String field = StringHelper.intern((String) entryKey.field); TermEnum enumerator = reader.terms (new Term (field)); try { Term term = enumerator.term(); @@ -588,17 +748,17 @@ class FieldCacheImpl implements ExtendedFieldCache { try { Integer.parseInt (termtext); - ret = getInts (reader, field); + ret = wrapper.getInts (reader, field); } catch (NumberFormatException nfe1) { try { Long.parseLong(termtext); - ret = getLongs (reader, field); + ret = wrapper.getLongs (reader, field); } catch (NumberFormatException nfe2) { try { Float.parseFloat (termtext); - ret = getFloats (reader, field); + ret = wrapper.getFloats (reader, field); } catch (NumberFormatException nfe3) { - ret = getStringIndex (reader, field); + ret = wrapper.getStringIndex (reader, field); } } } @@ -615,13 +775,16 @@ class FieldCacheImpl implements ExtendedFieldCache { /** @deprecated */ public Comparable[] getCustom(IndexReader reader, String field, SortComparator comparator) throws IOException { - return (Comparable[]) customCache.get(reader, new Entry(field, comparator)); + return (Comparable[]) ((Cache)caches.get(Comparable.class)).get(reader, new Entry(field, comparator)); } /** @deprecated */ - Cache customCache = new Cache() { + static final class CustomCache extends Cache { + CustomCache(FieldCache wrapper) { + super(wrapper); + } - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = (Entry) entryKey; String field = entry.field; diff --git a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java index a9c479edfe0..38cd2448602 100644 --- a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java @@ -180,7 +180,7 @@ extends PriorityQueue { * caches comparators instead of term values. */ static final FieldCacheImpl.Cache Comparators = new FieldCacheImpl.Cache() { - protected Object createValue(IndexReader reader, Object entryKey) + protected Object createValue(IndexReader reader, FieldCacheImpl.Entry entryKey) throws IOException { FieldCacheImpl.Entry entry = (FieldCacheImpl.Entry) entryKey; String fieldname = entry.field; diff --git a/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java b/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java new file mode 100644 index 00000000000..f10d7daace7 --- /dev/null +++ b/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java @@ -0,0 +1,74 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.IdentityHashMap; +import java.util.Map; + +/** + * An average, best guess, MemoryModel that should work okay on most systems. + * + */ +public class AverageGuessMemoryModel extends MemoryModel { + // best guess primitive sizes + private final Map sizes = new IdentityHashMap() { + { + put(boolean.class, new Integer(1)); + put(byte.class, new Integer(1)); + put(char.class, new Integer(2)); + put(short.class, new Integer(2)); + put(int.class, new Integer(4)); + put(float.class, new Integer(4)); + put(double.class, new Integer(8)); + put(long.class, new Integer(8)); + } + }; + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.util.MemoryModel#getArraySize() + */ + public int getArraySize() { + return 16; + } + + /* + * (non-Javadoc) + * + * @see org.apache.lucene.util.MemoryModel#getClassSize() + */ + public int getClassSize() { + return 8; + } + + /* (non-Javadoc) + * @see org.apache.lucene.util.MemoryModel#getPrimitiveSize(java.lang.Class) + */ + public int getPrimitiveSize(Class clazz) { + return ((Integer) sizes.get(clazz)).intValue(); + } + + /* (non-Javadoc) + * @see org.apache.lucene.util.MemoryModel#getReferenceSize() + */ + public int getReferenceSize() { + return 4; + } + +} diff --git a/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java b/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java new file mode 100644 index 00000000000..0ba1f7cdec1 --- /dev/null +++ b/src/java/org/apache/lucene/util/FieldCacheSanityChecker.java @@ -0,0 +1,436 @@ +package org.apache.lucene.util; +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.CacheEntry; + +/** + * Provides methods for sanity checking that entries in the FieldCache + * are not wasteful or inconsistent. + *

+ *

+ * Lucene 2.9 Introduced numerous enhancements into how the FieldCache + * is used by the low levels of Lucene searching (for Sorting and + * ValueSourceQueries) to improve both the speed for Sorting, as well + * as reopening of IndexReaders. But these changes have shifted the + * usage of FieldCache from "top level" IndexReaders (frequently a + * MultiReader or DirectoryReader) down to the leaf level SegmentReaders. + * As a result, existing applications that directly access the FieldCache + * may find RAM usage increase significantly when upgrading to 2.9 or + * Later. This class provides an API for these applications (or their + * Unit tests) to check at run time if the FieldCache contains "insane" + * usages of the FieldCache. + *

+ *

+ * EXPERIMENTAL API: This API is considered extremely advanced and + * experimental. It may be removed or altered w/o warning in future releases + * of Lucene. + *

+ * @see FieldCache + * @see FieldCacheSanityChecker.Insanity + * @see FieldCacheSanityChecker.InsanityType + */ +public final class FieldCacheSanityChecker { + + private RamUsageEstimator ramCalc = null; + public FieldCacheSanityChecker() { + /* NOOP */ + } + /** + * If set, will be used to estimate size for all CacheEntry objects + * dealt with. + */ + public void setRamUsageEstimator(RamUsageEstimator r) { + ramCalc = r; + } + + + /** + * Quick and dirty convenience method + * @see #check + */ + public static Insanity[] checkSanity(FieldCache cache) { + return checkSanity(cache.getCacheEntries()); + } + + /** + * Quick and dirty convenience method that instantiates an instance with + * "good defaults" and uses it to test the CacheEntry[] + * @see #check + */ + public static Insanity[] checkSanity(CacheEntry[] cacheEntries) { + FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker(); + // doesn't check for interned + sanityChecker.setRamUsageEstimator(new RamUsageEstimator(false)); + return sanityChecker.check(cacheEntries); + } + + + /** + * Tests a CacheEntry[] for indication of "insane" cache usage. + *

+ * NOTE:FieldCache CreationPlaceholder objects are ignored. + * (:TODO: is this a bad idea? are we masking a real problem?) + *

+ */ + public Insanity[] check(CacheEntry[] cacheEntries) { + if (null == cacheEntries || 0 == cacheEntries.length) + return new Insanity[0]; + + if (null != ramCalc) { + for (int i = 0; i < cacheEntries.length; i++) { + cacheEntries[i].estimateSize(ramCalc); + } + } + + // the indirect mapping lets MapOfSet dedup identical valIds for us + // + // maps the (valId) identityhashCode of cache values to + // sets of CacheEntry instances + final MapOfSets valIdToItems = new MapOfSets(new HashMap(17)); + // maps ReaderField keys to Sets of ValueIds + final MapOfSets readerFieldToValIds = new MapOfSets(new HashMap(17)); + // + + // any keys that we know result in more then one valId + final Set valMismatchKeys = new HashSet(); + + // iterate over all the cacheEntries to get the mappings we'll need + for (int i = 0; i < cacheEntries.length; i++) { + final CacheEntry item = cacheEntries[i]; + final Object val = item.getValue(); + + if (val instanceof FieldCache.CreationPlaceholder) + continue; + + final ReaderField rf = new ReaderField(item.getReaderKey(), + item.getFieldName()); + + final Integer valId = new Integer(System.identityHashCode(val)); + + // indirect mapping, so the MapOfSet will dedup identical valIds for us + valIdToItems.put(valId, item); + if (1 < readerFieldToValIds.put(rf, valId)) { + valMismatchKeys.add(rf); + } + } + + final List insanity = new ArrayList(valMismatchKeys.size() * 3); + + insanity.addAll(checkValueMismatch(valIdToItems, + readerFieldToValIds, + valMismatchKeys)); + insanity.addAll(checkSubreaders(valIdToItems, + readerFieldToValIds)); + + return (Insanity[]) insanity.toArray(new Insanity[insanity.size()]); + } + + /** + * Internal helper method used by check that iterates over + * valMismatchKeys and generates a Collection of Insanity + * instances accordingly. The MapOfSets are used to populate + * the Insantiy objects. + * @see InsanityType#VALUEMISMATCH + */ + private Collection checkValueMismatch(MapOfSets valIdToItems, + MapOfSets readerFieldToValIds, + Set valMismatchKeys) { + + final List insanity = new ArrayList(valMismatchKeys.size() * 3); + + if (! valMismatchKeys.isEmpty() ) { + // we have multiple values for some ReaderFields + + final Map rfMap = readerFieldToValIds.getMap(); + final Map valMap = valIdToItems.getMap(); + final Iterator mismatchIter = valMismatchKeys.iterator(); + while (mismatchIter.hasNext()) { + final ReaderField rf = (ReaderField)mismatchIter.next(); + final List badEntries = new ArrayList(valMismatchKeys.size() * 2); + final Iterator valIter = ((Set)rfMap.get(rf)).iterator(); + while (valIter.hasNext()) { + Iterator entriesIter = ((Set)valMap.get(valIter.next())).iterator(); + while (entriesIter.hasNext()) { + badEntries.add(entriesIter.next()); + } + } + + CacheEntry[] badness = new CacheEntry[badEntries.size()]; + badness = (CacheEntry[]) badEntries.toArray(badness); + + insanity.add(new Insanity(InsanityType.VALUEMISMATCH, + "Multiple distinct value objects for " + + rf.toString(), badness)); + } + } + return insanity; + } + + /** + * Internal helper method used by check that iterates over + * the keys of readerFieldToValIds and generates a Collection + * of Insanity instances whenever two (or more) ReaderField instances are + * found that have an ancestery relationships. + * + * @see InsanityType#SUBREADER + */ + private Collection checkSubreaders(MapOfSets valIdToItems, + MapOfSets readerFieldToValIds) { + + final List insanity = new ArrayList(23); + + Map badChildren = new HashMap(17); + MapOfSets badKids = new MapOfSets(badChildren); // wrapper + + Map viToItemSets = valIdToItems.getMap(); + Map rfToValIdSets = readerFieldToValIds.getMap(); + + Set seen = new HashSet(17); + + Set readerFields = rfToValIdSets.keySet(); + Iterator rfIter = readerFields.iterator(); + while (rfIter.hasNext()) { + ReaderField rf = (ReaderField) rfIter.next(); + + if (seen.contains(rf)) continue; + + List kids = getAllDecendentReaderKeys(rf.readerKey); + for (int i = 0; i < kids.size(); i++) { + ReaderField kid = new ReaderField(kids.get(i), rf.fieldName); + + if (badChildren.containsKey(kid)) { + // we've already process this kid as RF and found other problems + // track those problems as our own + badKids.put(rf, kid); + badKids.putAll(rf, (Collection)badChildren.get(kid)); + badChildren.remove(kid); + + } else if (rfToValIdSets.containsKey(kid)) { + // we have cache entries for the kid + badKids.put(rf, kid); + } + seen.add(kid); + } + seen.add(rf); + } + + // every mapping in badKids represents an Insanity + Iterator parentsIter = badChildren.keySet().iterator(); + while (parentsIter.hasNext()) { + ReaderField parent = (ReaderField) parentsIter.next(); + Set kids = (Set) badChildren.get(parent); + + List badEntries = new ArrayList(kids.size() * 2); + + // put parent entr(ies) in first + { + Iterator valIter =((Set)rfToValIdSets.get(parent)).iterator(); + while (valIter.hasNext()) { + badEntries.addAll((Set)viToItemSets.get(valIter.next())); + } + } + + // now the entries for the descendants + Iterator kidsIter = kids.iterator(); + while (kidsIter.hasNext()) { + ReaderField kid = (ReaderField) kidsIter.next(); + Iterator valIter =((Set)rfToValIdSets.get(kid)).iterator(); + while (valIter.hasNext()) { + badEntries.addAll((Set)viToItemSets.get(valIter.next())); + } + } + + CacheEntry[] badness = new CacheEntry[badEntries.size()]; + badness = (CacheEntry[]) badEntries.toArray(badness); + + insanity.add(new Insanity(InsanityType.SUBREADER, + "Found caches for decendents of " + + parent.toString(), + badness)); + } + + return insanity; + + } + + /** + * Checks if the seed is an IndexReader, and if so will walk + * the hierarchy of subReaders building up a list of the objects + * returned by obj.getFieldCacheKey() + */ + private List getAllDecendentReaderKeys(Object seed) { + List all = new ArrayList(17); // will grow as we iter + all.add(seed); + for (int i = 0; i < all.size(); i++) { + Object obj = all.get(i); + if (obj instanceof IndexReader) { + IndexReader[] subs = ((IndexReader)obj).getSequentialSubReaders(); + for (int j = 0; (null != subs) && (j < subs.length); j++) { + all.add(subs[j].getFieldCacheKey()); + } + } + + } + // need to skip the first, because it was the seed + return all.subList(1, all.size()); + } + + /** + * Simple pair object for using "readerKey + fieldName" a Map key + */ + private final static class ReaderField { + public final Object readerKey; + public final String fieldName; + public ReaderField(Object readerKey, String fieldName) { + this.readerKey = readerKey; + this.fieldName = fieldName; + } + public int hashCode() { + return System.identityHashCode(readerKey) * fieldName.hashCode(); + } + public boolean equals(Object that) { + if (! (that instanceof ReaderField)) return false; + + ReaderField other = (ReaderField) that; + return (this.readerKey == other.readerKey && + this.fieldName.equals(other.fieldName)); + } + public String toString() { + return readerKey.toString() + "+" + fieldName; + } + } + + /** + * Simple container for a collection of related CacheEntry objects that + * in conjunction with eachother represent some "insane" usage of the + * FieldCache. + */ + public final static class Insanity { + private final InsanityType type; + private final String msg; + private final CacheEntry[] entries; + public Insanity(InsanityType type, String msg, CacheEntry[] entries) { + if (null == type) { + throw new IllegalArgumentException + ("Insanity requires non-null InsanityType"); + } + if (null == entries || 0 == entries.length) { + throw new IllegalArgumentException + ("Insanity requires non-null/non-empty CacheEntry[]"); + } + this.type = type; + this.msg = msg; + this.entries = entries; + + } + /** + * Type of insane behavior this object represents + */ + public InsanityType getType() { return type; } + /** + * Description of hte insane behavior + */ + public String getMsg() { return msg; } + /** + * CacheEntry objects which suggest a problem + */ + public CacheEntry[] getCacheEntries() { return entries; } + /** + * Multi-Line representation of this Insanity object, starting with + * the Type and Msg, followed by each CacheEntry.toString() on it's + * own line prefaced by a tab character + */ + public String toString() { + StringBuffer buf = new StringBuffer(); + buf.append(getType()).append(": "); + + String m = getMsg(); + if (null != m) buf.append(m); + + buf.append('\n'); + + CacheEntry[] ce = getCacheEntries(); + for (int i = 0; i < ce.length; i++) { + buf.append('\t').append(ce[i].toString()).append('\n'); + } + + return buf.toString(); + } + } + + /** + * An Enumaration of the differnet types of "insane" behavior that + * may be detected in a FieldCache. + * + * @see InsanityType#SUBREADER + * @see InsanityType#VALUEMISMATCH + * @see InsanityType#EXPECTED + */ + public final static class InsanityType { + private final String label; + private InsanityType(final String label) { + this.label = label; + } + public String toString() { return label; } + + /** + * Indicates an overlap in cache usage on a given field + * in sub/super readers. + */ + public final static InsanityType SUBREADER + = new InsanityType("SUBREADER"); + + /** + *

+ * Indicates entries have the same reader+fieldname but + * different cached values. This can happen if different datatypes, + * or parsers are used -- and while it's not necessarily a bug + * it's typically an indication of a possible problem. + *

+ *

+ * Only the reader, fieldname, and cached value are actually + * tested -- if two cache entries have different parsers or datatypes but + * the cached values are the same Object (== not just equal()) this method + * does not consider that a red flag. This allows for subtle variations + * in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) + *

+ */ + public final static InsanityType VALUEMISMATCH + = new InsanityType("VALUEMISMATCH"); + + /** + * Indicates an expected bit of "insanity". This may be useful for + * clients that wish to preserve/log information about insane usage + * but indicate that it was expected. + */ + public final static InsanityType EXPECTED + = new InsanityType("EXPECTED"); + } + + +} diff --git a/src/java/org/apache/lucene/util/MapOfSets.java b/src/java/org/apache/lucene/util/MapOfSets.java new file mode 100644 index 00000000000..eadb2feb149 --- /dev/null +++ b/src/java/org/apache/lucene/util/MapOfSets.java @@ -0,0 +1,81 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.util.Set; +import java.util.Collection; +import java.util.HashSet; +import java.util.Map; + +/** + * Helper class for keeping Listss of Objects associated with keys. WARNING: THIS CLASS IS NOT THREAD SAFE + */ +public class MapOfSets { + + private final Map theMap; + + /** + * @param m the backing store for this object + */ + public MapOfSets(Map m) { + theMap = m; + } + + /** + * @return direct access to the map backing this object. + */ + public Map getMap() { + return theMap; + } + + /** + * Adds val to the Set associated with key in the Map. If key is not + * already in the map, a new Set will first be created. + * @return the size of the Set associated with key once val is added to it. + */ + public int put(Object key, Object val) { + final Set theSet; + if (theMap.containsKey(key)) { + theSet = (Set)theMap.get(key); + } else { + theSet = new HashSet(23); + theMap.put(key, theSet); + } + theSet.add(val); + return theSet.size(); + } + /** + * Adds multiple vals to the Set associated with key in the Map. + * If key is not + * already in the map, a new Set will first be created. + * @return the size of the Set associated with key once val is added to it. + */ + public int putAll(Object key, Collection vals) { + final Set theSet; + if (theMap.containsKey(key)) { + theSet = (Set)theMap.get(key); + } else { + theSet = new HashSet(23); + theMap.put(key, theSet); + } + theSet.addAll(vals); + return theSet.size(); + } + +} diff --git a/src/java/org/apache/lucene/util/MemoryModel.java b/src/java/org/apache/lucene/util/MemoryModel.java new file mode 100644 index 00000000000..ed422d2f81c --- /dev/null +++ b/src/java/org/apache/lucene/util/MemoryModel.java @@ -0,0 +1,48 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +/** + * Returns primitive memory sizes for estimating RAM usage. + * + */ +public abstract class MemoryModel { + + /** + * @return size of array beyond contents + */ + public abstract int getArraySize(); + + /** + * @return Class size overhead + */ + public abstract int getClassSize(); + + /** + * @param clazz a primitive Class - bool, byte, char, short, long, float, + * short, double, int + * @return the size in bytes of given primitive Class + */ + public abstract int getPrimitiveSize(Class clazz); + + /** + * @return size of reference + */ + public abstract int getReferenceSize(); + +} diff --git a/src/java/org/apache/lucene/util/RamUsageEstimator.java b/src/java/org/apache/lucene/util/RamUsageEstimator.java new file mode 100644 index 00000000000..bc84448a9a0 --- /dev/null +++ b/src/java/org/apache/lucene/util/RamUsageEstimator.java @@ -0,0 +1,197 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.*; +import java.text.DecimalFormat; +import java.util.*; + +/** + * Estimates the size of a given Object using a given MemoryModel for primitive + * size information. + * + * Resource Usage: + * + * Internally uses a Map to temporally hold a reference to every + * object seen. + * + * If checkIntered, all Strings checked will be interned, but those + * that were not already interned will be released for GC when the + * estimate is complete. + */ +public final class RamUsageEstimator { + private MemoryModel memoryModel; + + private final Map seen; + + private int refSize; + private int arraySize; + private int classSize; + + private boolean checkInterned; + + /** + * Constructs this object with an AverageGuessMemoryModel and + * checkInterned = true. + */ + public RamUsageEstimator() { + this(new AverageGuessMemoryModel()); + } + + /** + * @param checkInterned check if Strings are interned and don't add to size + * if they are. Defaults to true but if you know the objects you are checking + * won't likely contain many interned Strings, it will be faster to turn off + * intern checking. + */ + public RamUsageEstimator(boolean checkInterned) { + this(new AverageGuessMemoryModel(), checkInterned); + } + + /** + * @param memoryModel MemoryModel to use for primitive object sizes. + */ + public RamUsageEstimator(MemoryModel memoryModel) { + this(memoryModel, true); + } + + /** + * @param memoryModel MemoryModel to use for primitive object sizes. + * @param checkInterned check if Strings are interned and don't add to size + * if they are. Defaults to true but if you know the objects you are checking + * won't likely contain many interned Strings, it will be faster to turn off + * intern checking. + */ + public RamUsageEstimator(MemoryModel memoryModel, boolean checkInterned) { + this.memoryModel = memoryModel; + this.checkInterned = checkInterned; + // Use Map rather than Set so that we can use an IdentityHashMap - not + // seeing an IdentityHashSet + seen = new IdentityHashMap(64); + this.refSize = memoryModel.getReferenceSize(); + this.arraySize = memoryModel.getArraySize(); + this.classSize = memoryModel.getClassSize(); + } + + public long estimateRamUsage(Object obj) { + long size = size(obj); + seen.clear(); + return size; + } + + private long size(Object obj) { + if (obj == null) { + return 0; + } + // interned not part of this object + if (checkInterned && obj instanceof String + && obj == ((String) obj).intern()) { // interned string will be eligible + // for GC on + // estimateRamUsage(Object) return + return 0; + } + + // skip if we have seen before + if (seen.containsKey(obj)) { + return 0; + } + + // add to seen + seen.put(obj, null); + + Class clazz = obj.getClass(); + if (clazz.isArray()) { + return sizeOfArray(obj); + } + + long size = 0; + + // walk type hierarchy + while (clazz != null) { + Field[] fields = clazz.getDeclaredFields(); + for (int i = 0; i < fields.length; i++) { + if (Modifier.isStatic(fields[i].getModifiers())) { + continue; + } + + if (fields[i].getType().isPrimitive()) { + size += memoryModel.getPrimitiveSize(fields[i].getType()); + } else { + size += refSize; + fields[i].setAccessible(true); + try { + Object value = fields[i].get(obj); + if (value != null) { + size += size(value); + } + } catch (IllegalAccessException ex) { + // ignore for now? + } + } + + } + clazz = clazz.getSuperclass(); + } + size += classSize; + return size; + } + + private long sizeOfArray(Object obj) { + int len = Array.getLength(obj); + if (len == 0) { + return 0; + } + long size = arraySize; + Class arrayElementClazz = obj.getClass().getComponentType(); + if (arrayElementClazz.isPrimitive()) { + size += len * memoryModel.getPrimitiveSize(arrayElementClazz); + } else { + for (int i = 0; i < len; i++) { + size += refSize + size(Array.get(obj, i)); + } + } + + return size; + } + + private static final long ONE_KB = 1024; + private static final long ONE_MB = ONE_KB * ONE_KB; + private static final long ONE_GB = ONE_KB * ONE_MB; + + /** + * Return good default units based on byte size. + */ + public static String humanReadableUnits(long bytes, DecimalFormat df) { + String newSizeAndUnits; + + if (bytes / ONE_GB > 0) { + newSizeAndUnits = String.valueOf(df.format((float) bytes / ONE_GB)) + + " GB"; + } else if (bytes / ONE_MB > 0) { + newSizeAndUnits = String.valueOf(df.format((float) bytes / ONE_MB)) + + " MB"; + } else if (bytes / ONE_KB > 0) { + newSizeAndUnits = String.valueOf(df.format((float) bytes / ONE_KB)) + + " KB"; + } else { + newSizeAndUnits = String.valueOf(bytes) + " bytes"; + } + + return newSizeAndUnits; + } +} diff --git a/src/test/org/apache/lucene/search/QueryUtils.java b/src/test/org/apache/lucene/search/QueryUtils.java index cd06d41b920..bdcf36dd222 100644 --- a/src/test/org/apache/lucene/search/QueryUtils.java +++ b/src/test/org/apache/lucene/search/QueryUtils.java @@ -146,74 +146,89 @@ public class QueryUtils { {skip_op, skip_op, skip_op, next_op, next_op}, }; for (int k = 0; k < orders.length; k++) { - final int order[] = orders[k]; - //System.out.print("Order:");for (int i = 0; i < order.length; i++) System.out.print(order[i]==skip_op ? " skip()":" next()"); System.out.println(); - final int opidx[] = {0}; + IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); - final Weight w = q.weight(s); - final Scorer scorer = w.scorer(s.getIndexReader(), true, false); - if (scorer == null) { - continue; - } + for (int x = 0; x < readers.length; x++) { + IndexReader reader = readers[x]; - // FUTURE: ensure scorer.doc()==-1 + final int order[] = orders[k]; + // System.out.print("Order:");for (int i = 0; i < order.length; i++) + // System.out.print(order[i]==skip_op ? " skip()":" next()"); + // System.out.println(); + final int opidx[] = { 0 }; - final int[] sdoc = new int[] {-1}; - final float maxDiff = 1e-5f; - s.search(q,new Collector() { - private int base = 0; - private Scorer sc; - public void setScorer(Scorer scorer) throws IOException { - this.sc = scorer; + final Weight w = q.weight(s); + final Scorer scorer = w.scorer(reader, true, false); + if (scorer == null) { + continue; } - public void collect(int doc) throws IOException { - doc = doc + base; - float score = sc.score(); - try { - int op = order[(opidx[0]++)%order.length]; - //System.out.println(op==skip_op ? "skip("+(sdoc[0]+1)+")":"next()"); - boolean more = op == skip_op ? scorer.advance(sdoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS - : scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; - sdoc[0] = scorer.docID(); - float scorerScore = scorer.score(); - float scorerScore2 = scorer.score(); - float scoreDiff = Math.abs(score-scorerScore); - float scorerDiff = Math.abs(scorerScore2-scorerScore); - if (!more || doc != sdoc[0] || scoreDiff>maxDiff || scorerDiff>maxDiff) { - StringBuffer sbord = new StringBuffer(); - for (int i = 0; i < order.length; i++) - sbord.append(order[i]==skip_op ? " skip()":" next()"); - throw new RuntimeException("ERROR matching docs:" - +"\n\t"+(doc!=sdoc[0]?"--> ":"")+"doc="+sdoc[0] - +"\n\t"+(!more?"--> ":"")+"tscorer.more=" + more - +"\n\t"+(scoreDiff>maxDiff?"--> ":"")+"scorerScore="+scorerScore+" scoreDiff="+scoreDiff + " maxDiff="+maxDiff - +"\n\t"+(scorerDiff>maxDiff?"--> ":"")+"scorerScore2="+scorerScore2+" scorerDiff="+scorerDiff - +"\n\thitCollector.doc=" + doc + " score="+score - +"\n\t Scorer=" + scorer - +"\n\t Query=" + q + " "+q.getClass().getName() - +"\n\t Searcher=" + s - +"\n\t Order=" + sbord - +"\n\t Op=" + (op==skip_op ? " skip()":" next()") - ); - } - } catch (IOException e) { - throw new RuntimeException(e); + + // FUTURE: ensure scorer.doc()==-1 + + final int[] sdoc = new int[] { -1 }; + final float maxDiff = 1e-5f; + s.search(q, new Collector() { + private int base = 0; + private Scorer sc; + + public void setScorer(Scorer scorer) throws IOException { + this.sc = scorer; } - } - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; - } - public boolean acceptsDocsOutOfOrder() { - return true; - } - }); - - // make sure next call to scorer is false. - int op = order[(opidx[0]++)%order.length]; - //System.out.println(op==skip_op ? "last: skip()":"last: next()"); - boolean more = (op == skip_op ? scorer.advance(sdoc[0] + 1) : scorer - .nextDoc()) != DocIdSetIterator.NO_MORE_DOCS; - Assert.assertFalse(more); + + public void collect(int doc) throws IOException { + doc = doc + base; + float score = sc.score(); + try { + int op = order[(opidx[0]++) % order.length]; + // System.out.println(op==skip_op ? + // "skip("+(sdoc[0]+1)+")":"next()"); + boolean more = op == skip_op ? scorer.advance(sdoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS + : scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS; + sdoc[0] = scorer.docID(); + float scorerScore = scorer.score(); + float scorerScore2 = scorer.score(); + float scoreDiff = Math.abs(score - scorerScore); + float scorerDiff = Math.abs(scorerScore2 - scorerScore); + if (!more || doc != sdoc[0] || scoreDiff > maxDiff + || scorerDiff > maxDiff) { + StringBuffer sbord = new StringBuffer(); + for (int i = 0; i < order.length; i++) + sbord.append(order[i] == skip_op ? " skip()" : " next()"); + throw new RuntimeException("ERROR matching docs:" + "\n\t" + + (doc != sdoc[0] ? "--> " : "") + "doc=" + sdoc[0] + + "\n\t" + (!more ? "--> " : "") + "tscorer.more=" + more + + "\n\t" + (scoreDiff > maxDiff ? "--> " : "") + + "scorerScore=" + scorerScore + " scoreDiff=" + scoreDiff + + " maxDiff=" + maxDiff + "\n\t" + + (scorerDiff > maxDiff ? "--> " : "") + "scorerScore2=" + + scorerScore2 + " scorerDiff=" + scorerDiff + + "\n\thitCollector.doc=" + doc + " score=" + score + + "\n\t Scorer=" + scorer + "\n\t Query=" + q + " " + + q.getClass().getName() + "\n\t Searcher=" + s + + "\n\t Order=" + sbord + "\n\t Op=" + + (op == skip_op ? " skip()" : " next()")); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + + public boolean acceptsDocsOutOfOrder() { + return true; + } + }); + + // make sure next call to scorer is false. + int op = order[(opidx[0]++) % order.length]; + // System.out.println(op==skip_op ? "last: skip()":"last: next()"); + boolean more = (op == skip_op ? scorer.advance(sdoc[0] + 1) : scorer + .nextDoc()) != DocIdSetIterator.NO_MORE_DOCS; + Assert.assertFalse(more); + } } } @@ -223,19 +238,19 @@ public class QueryUtils { final float maxDiff = 1e-5f; final int lastDoc[] = {-1}; s.search(q,new Collector() { - private int base = 0; private Scorer scorer; + private IndexReader reader; public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } public void collect(int doc) throws IOException { //System.out.println("doc="+doc); - doc = doc + base; float score = scorer.score(); try { + for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = q.weight(s); - Scorer scorer = w.scorer(s.getIndexReader(), true, false); + Scorer scorer = w.scorer(reader, true, false); Assert.assertTrue("query collected "+doc+" but skipTo("+i+") says no more docs!",scorer.advance(i) != DocIdSetIterator.NO_MORE_DOCS); Assert.assertEquals("query collected "+doc+" but skipTo("+i+") got to "+scorer.docID(),doc,scorer.docID()); float skipToScore = scorer.score(); @@ -248,18 +263,27 @@ public class QueryUtils { } } public void setNextReader(IndexReader reader, int docBase) { - base = docBase; + this.reader = reader; + lastDoc[0] = -1; } public boolean acceptsDocsOutOfOrder() { return false; } }); - Weight w = q.weight(s); - Scorer scorer = w.scorer(s.getIndexReader(), true, false); - if (scorer != null) { - boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; - if (more) - Assert.assertFalse("query's last doc was "+lastDoc[0]+" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + + IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); + for(int i = 0; i < readers.length; i++) { + IndexReader reader = readers[i]; + Weight w = q.weight(s); + Scorer scorer = w.scorer(reader, true, false); + + if (scorer != null) { + boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; + + if (more && lastDoc[0] != -1) + Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + } } + } } diff --git a/src/test/org/apache/lucene/search/TestSort.java b/src/test/org/apache/lucene/search/TestSort.java index 8707823de15..a873f88a07f 100644 --- a/src/test/org/apache/lucene/search/TestSort.java +++ b/src/test/org/apache/lucene/search/TestSort.java @@ -320,16 +320,26 @@ public class TestSort extends LuceneTestCase implements Serializable { } - // test sorts where the type of field is specified and a custom field parser is used, that - // uses a simple char encoding. The sorted string contains a character beginning from 'A' that - // is mapped to a numeric value using some "funny" algorithm to be different for each data type. + /** + * test sorts where the type of field is specified and a custom field parser + * is used, that uses a simple char encoding. The sorted string contains a + * character beginning from 'A' that is mapped to a numeric value using some + * "funny" algorithm to be different for each data type. + */ public void testCustomFieldParserSort() throws Exception { + // since tests explicilty uses different parsers on the same fieldname + // we explicitly check/purge the FieldCache between each assertMatch + FieldCache fc = FieldCache.DEFAULT; + + sort.setSort (new SortField[] { new SortField ("parser", new FieldCache.IntParser(){ public final int parseInt(final String val) { return (val.charAt(0)-'A') * 123456; } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " IntParser"); + fc.purgeAllCaches(); sort.setSort (new SortField[] { new SortField ("parser", new FieldCache.FloatParser(){ public final float parseFloat(final String val) { @@ -337,6 +347,8 @@ public class TestSort extends LuceneTestCase implements Serializable { } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " FloatParser"); + fc.purgeAllCaches(); sort.setSort (new SortField[] { new SortField ("parser", new FieldCache.LongParser(){ public final long parseLong(final String val) { @@ -344,6 +356,8 @@ public class TestSort extends LuceneTestCase implements Serializable { } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " LongParser"); + fc.purgeAllCaches(); sort.setSort (new SortField[] { new SortField ("parser", new FieldCache.DoubleParser(){ public final double parseDouble(final String val) { @@ -351,6 +365,8 @@ public class TestSort extends LuceneTestCase implements Serializable { } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " DoubleParser"); + fc.purgeAllCaches(); sort.setSort (new SortField[] { new SortField ("parser", new FieldCache.ByteParser(){ public final byte parseByte(final String val) { @@ -358,6 +374,8 @@ public class TestSort extends LuceneTestCase implements Serializable { } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " ByteParser"); + fc.purgeAllCaches(); sort.setSort (new SortField[] { new SortField ("parser", new FieldCache.ShortParser(){ public final short parseShort(final String val) { @@ -365,6 +383,8 @@ public class TestSort extends LuceneTestCase implements Serializable { } }), SortField.FIELD_DOC }); assertMatches (full, queryA, sort, "JIHGFEDCBA"); + assertSaneFieldCaches(getName() + " ShortParser"); + fc.purgeAllCaches(); } // test sorts when there's nothing in the index @@ -930,12 +950,6 @@ public class TestSort extends LuceneTestCase implements Serializable { sort.setSort("string", true); assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - sort.setSort(new SortField[] { new SortField ("string", Locale.US) }); - assertMatches(multi, queryA, sort, "DJAIHGFEBC"); - - sort.setSort(new SortField[] { new SortField ("string", Locale.US, true) }); - assertMatches(multi, queryA, sort, "CBEFGHIAJD"); - sort.setSort(new String[] {"int","float"}); assertMatches(multi, queryA, sort, "IDHFGJEABC"); @@ -956,6 +970,25 @@ public class TestSort extends LuceneTestCase implements Serializable { sort.setSort("string", true); assertMatches(multi, queryF, sort, "IJZ"); + + // up to this point, all of the searches should have "sane" + // FieldCache behavior, and should have reused hte cache in several cases + assertSaneFieldCaches(getName() + " various"); + // next we'll check Locale based (String[]) for 'string', so purge first + FieldCache.DEFAULT.purgeAllCaches(); + + sort.setSort(new SortField[] { new SortField ("string", Locale.US) }); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); + + sort.setSort(new SortField[] { new SortField ("string", Locale.US, true) }); + assertMatches(multi, queryA, sort, "CBEFGHIAJD"); + + sort.setSort(new SortField[] { new SortField ("string", Locale.UK) }); + assertMatches(multi, queryA, sort, "DJAIHGFEBC"); + + assertSaneFieldCaches(getName() + " Locale.US + Locale.UK"); + FieldCache.DEFAULT.purgeAllCaches(); + } // make sure the documents returned by the search match the expected list diff --git a/src/test/org/apache/lucene/search/TestStressSort.java b/src/test/org/apache/lucene/search/TestStressSort.java index f46cd33cbbc..a81e36cf10d 100644 --- a/src/test/org/apache/lucene/search/TestStressSort.java +++ b/src/test/org/apache/lucene/search/TestStressSort.java @@ -26,6 +26,9 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.FieldCacheSanityChecker; +import org.apache.lucene.util.FieldCacheSanityChecker.Insanity; +import org.apache.lucene.util.FieldCacheSanityChecker.InsanityType; import java.util.Random; import java.util.Arrays; @@ -107,9 +110,13 @@ public class TestStressSort extends LuceneTestCase { doc.add(doubleField); doc2.add(doubleField); + // we use two diff string fields so our FieldCache usage + // is less suspicious to cache inspection final Field stringField = new Field("string", "", Field.Store.NO, Field.Index.NOT_ANALYZED); doc.add(stringField); - // doc2 doesn't have stringField, so we get nulls + final Field stringFieldIdx = new Field("stringIdx", "", Field.Store.NO, Field.Index.NOT_ANALYZED); + doc.add(stringFieldIdx); + // doc2 doesn't have stringField or stringFieldIdx, so we get nulls for(int i=0;isetUp() or - * tearDown() in your unit test, make sure you - * call super.setUp() and - * super.tearDown(). - */ +import junit.framework.TestCase; +import org.apache.lucene.index.ConcurrentMergeScheduler; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.CacheEntry; +import org.apache.lucene.util.FieldCacheSanityChecker.Insanity; + +/** + * Base class for all Lucene unit tests. + *

+ * Currently the + * only added functionality over JUnit's TestCase is + * asserting that no unhandled exceptions occurred in + * threads launched by ConcurrentMergeScheduler and asserting sane + * FieldCache usage athe moment of tearDown. + *

+ *

+ * If you + * override either setUp() or + * tearDown() in your unit test, make sure you + * call super.setUp() and + * super.tearDown() + *

+ * @see #assertSaneFieldCaches + */ public abstract class LuceneTestCase extends TestCase { public LuceneTestCase() { @@ -46,14 +61,113 @@ public abstract class LuceneTestCase extends TestCase { ConcurrentMergeScheduler.setTestMode(); } + /** + * Forcible purges all cache entries from the FieldCache. + *

+ * This method will be called by tearDown to clean up FieldCache.DEFAULT. + * If a (poorly written) test has some expectation that the FieldCache + * will persist across test methods (ie: a static IndexReader) this + * method can be overridden to do nothing. + *

+ * @see FieldCache#purgeAllCaches() + */ + protected void purgeFieldCache(final FieldCache fc) { + fc.purgeAllCaches(); + } + + protected String getTestLabel() { + return getClass().getName() + "." + getName(); + } + protected void tearDown() throws Exception { - if (ConcurrentMergeScheduler.anyUnhandledExceptions()) { - // Clear the failure so that we don't just keep - // failing subsequent test cases - ConcurrentMergeScheduler.clearUnhandledExceptions(); - fail("ConcurrentMergeScheduler hit unhandled exceptions"); + try { + // this isn't as useful as calling directly from the scope where the + // index readers are used, because they could be gc'ed just before + // tearDown is called. + // But it's better then nothing. + assertSaneFieldCaches(getTestLabel()); + + if (ConcurrentMergeScheduler.anyUnhandledExceptions()) { + // Clear the failure so that we don't just keep + // failing subsequent test cases + ConcurrentMergeScheduler.clearUnhandledExceptions(); + fail("ConcurrentMergeScheduler hit unhandled exceptions"); + } + } finally { + purgeFieldCache(FieldCache.DEFAULT); } } + + /** + * Asserts that FieldCacheSanityChecker does not detect any + * problems with FieldCache.DEFAULT. + *

+ * If any problems are found, they are logged to System.err + * (allong with the msg) when the Assertion is thrown. + *

+ *

+ * This method is called by tearDown after every test method, + * however IndexReaders scoped inside test methods may be garbage + * collected prior to this method being called, causing errors to + * be overlooked. Tests are encouraged to keep their IndexReaders + * scoped at the class level, or to explicitly call this method + * directly in the same scope as the IndexReader. + *

+ * @see FieldCacheSanityChecker + */ + protected void assertSaneFieldCaches(final String msg) { + final CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries(); + Insanity[] insanity = null; + try { + try { + insanity = FieldCacheSanityChecker.checkSanity(entries); + } catch (RuntimeException e) { + dumpArray(msg+ ": FieldCache", entries, System.err); + throw e; + } + + assertEquals(msg + ": Insane FieldCache usage(s) found", + 0, insanity.length); + insanity = null; + } finally { + + // report this in the event of any exception/failure + // if no failure, then insanity will be null anyway + if (null != insanity) { + dumpArray(msg + ": Insane FieldCache usage(s)", insanity, System.err); + } + + } + } + + /** + * Convinience method for logging an iterator. + * @param label String logged before/after the items in the iterator + * @param iter Each next() is toString()ed and logged on it's own line. If iter is null this is logged differnetly then an empty iterator. + * @param stream Stream to log messages to. + */ + public static void dumpIterator(String label, Iterator iter, + PrintStream stream) { + stream.println("*** BEGIN "+label+" ***"); + if (null == iter) { + stream.println(" ... NULL ..."); + } else { + while (iter.hasNext()) { + stream.println(iter.next().toString()); + } + } + stream.println("*** END "+label+" ***"); + } + + /** + * Convinience method for logging an array. Wraps the array in an iterator and delegates + * @see dumpIterator(String,Iterator,PrintStream) + */ + public static void dumpArray(String label, Object[] objs, + PrintStream stream) { + Iterator iter = (null == objs) ? null : Arrays.asList(objs).iterator(); + dumpIterator(label, iter, stream); + } /** * Returns a {@link Random} instance for generating random numbers during the test. diff --git a/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java b/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java new file mode 100644 index 00000000000..ca1dd3ae104 --- /dev/null +++ b/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java @@ -0,0 +1,181 @@ +package org.apache.lucene.util; + +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.FieldCacheSanityChecker.Insanity; +import org.apache.lucene.util.FieldCacheSanityChecker.InsanityType; + +import java.io.IOException; + +public class TestFieldCacheSanityChecker extends LuceneTestCase { + + protected IndexReader readerA; + protected IndexReader readerB; + protected IndexReader readerX; + + private static final int NUM_DOCS = 1000; + + protected void setUp() throws Exception { + super.setUp(); + + RAMDirectory dirA = new RAMDirectory(); + RAMDirectory dirB = new RAMDirectory(); + + IndexWriter wA = new IndexWriter(dirA, new WhitespaceAnalyzer(), true, + IndexWriter.MaxFieldLength.LIMITED); + IndexWriter wB = new IndexWriter(dirB, new WhitespaceAnalyzer(), true, + IndexWriter.MaxFieldLength.LIMITED); + + long theLong = Long.MAX_VALUE; + double theDouble = Double.MAX_VALUE; + byte theByte = Byte.MAX_VALUE; + short theShort = Short.MAX_VALUE; + int theInt = Integer.MAX_VALUE; + float theFloat = Float.MAX_VALUE; + for (int i = 0; i < NUM_DOCS; i++){ + Document doc = new Document(); + doc.add(new Field("theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theDouble", String.valueOf(theDouble--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theByte", String.valueOf(theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theShort", String.valueOf(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theInt", String.valueOf(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("theFloat", String.valueOf(theFloat--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + if (0 == i % 3) { + wA.addDocument(doc); + } else { + wB.addDocument(doc); + } + } + wA.close(); + wB.close(); + readerA = IndexReader.open(dirA); + readerB = IndexReader.open(dirB); + readerX = new MultiReader(new IndexReader[] { readerA, readerB }); + } + + public void tearDown() throws Exception { + super.tearDown(); + readerA.close(); + readerB.close(); + readerX.close(); + } + + public void testSanity() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + double [] doubles; + int [] ints; + + doubles = cache.getDoubles(readerA, "theDouble"); + doubles = cache.getDoubles(readerA, "theDouble", + FieldCache.DEFAULT_DOUBLE_PARSER); + doubles = cache.getDoubles(readerB, "theDouble", + FieldCache.DEFAULT_DOUBLE_PARSER); + + ints = cache.getInts(readerX, "theInt"); + ints = cache.getInts(readerX, "theInt", + FieldCache.DEFAULT_INT_PARSER); + + // // // + + Insanity[] insanity = + FieldCacheSanityChecker.checkSanity(cache.getCacheEntries()); + + if (0 < insanity.length) + dumpArray(getTestLabel() + " INSANITY", insanity, System.err); + + assertEquals("shouldn't be any cache insanity", 0, insanity.length); + cache.purgeAllCaches(); + } + + public void testInsanity1() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + int [] ints; + String [] strings; + byte [] bytes; + + ints = cache.getInts(readerX, "theInt", FieldCache.DEFAULT_INT_PARSER); + strings = cache.getStrings(readerX, "theInt"); + + // this one is ok + bytes = cache.getBytes(readerX, "theByte"); + + // // // + + Insanity[] insanity = + FieldCacheSanityChecker.checkSanity(cache.getCacheEntries()); + + assertEquals("wrong number of cache errors", 1, insanity.length); + assertEquals("wrong type of cache error", + InsanityType.VALUEMISMATCH, + insanity[0].getType()); + assertEquals("wrong number of entries in cache error", 2, + insanity[0].getCacheEntries().length); + + // we expect bad things, don't let tearDown complain about them + cache.purgeAllCaches(); + } + + public void testInsanity2() throws IOException { + FieldCache cache = FieldCache.DEFAULT; + cache.purgeAllCaches(); + + String [] strings; + byte [] bytes; + + strings = cache.getStrings(readerA, "theString"); + strings = cache.getStrings(readerB, "theString"); + strings = cache.getStrings(readerX, "theString"); + + // this one is ok + bytes = cache.getBytes(readerX, "theByte"); + + + // // // + + Insanity[] insanity = + FieldCacheSanityChecker.checkSanity(cache.getCacheEntries()); + + assertEquals("wrong number of cache errors", 1, insanity.length); + assertEquals("wrong type of cache error", + InsanityType.SUBREADER, + insanity[0].getType()); + assertEquals("wrong number of entries in cache error", 3, + insanity[0].getCacheEntries().length); + + // we expect bad things, don't let tearDown complain about them + cache.purgeAllCaches(); + } + + public void testInsanity3() throws IOException { + + // :TODO: subreader tree walking is really hairy ... add more crazy tests. + } + +} diff --git a/src/test/org/apache/lucene/util/TestRamUsageEstimator.java b/src/test/org/apache/lucene/util/TestRamUsageEstimator.java new file mode 100644 index 00000000000..124ee6852d1 --- /dev/null +++ b/src/test/org/apache/lucene/util/TestRamUsageEstimator.java @@ -0,0 +1,57 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import junit.framework.TestCase; + +public class TestRamUsageEstimator extends TestCase { + + public void testBasic() { + String string = new String("test str"); + RamUsageEstimator rue = new RamUsageEstimator(); + long size = rue.estimateRamUsage(string); + System.out.println("size:" + size); + + string = new String("test strin"); + size = rue.estimateRamUsage(string); + System.out.println("size:" + size); + + Holder holder = new Holder(); + holder.holder = new Holder("string2", 5000L); + size = rue.estimateRamUsage(holder); + System.out.println("size:" + size); + + String[] strings = new String[]{new String("test strin"), new String("hollow"), new String("catchmaster")}; + size = rue.estimateRamUsage(strings); + System.out.println("size:" + size); + } + + private static final class Holder { + long field1 = 5000L; + String name = "name"; + Holder holder; + + Holder() { + } + + Holder(String name, long field1) { + this.name = name; + this.field1 = field1; + } + } +}