diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 72d47e45092..66813446b07 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -231,6 +231,9 @@ New features * LUCENE-2648: PackedInts.Iterator now supports to advance by more than a single ordinal. (Simon Willnauer) + +* LUCENE-2649: Objects in the FieldCache can optionally store Bits + that mark which docs have real values in the native[] (ryan) Optimizations diff --git a/lucene/src/java/org/apache/lucene/search/FieldCache.java b/lucene/src/java/org/apache/lucene/search/FieldCache.java index df0359efca0..169a0e46c0e 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldCache.java +++ b/lucene/src/java/org/apache/lucene/search/FieldCache.java @@ -19,6 +19,8 @@ package org.apache.lucene.search; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.cache.EntryCreator; +import org.apache.lucene.search.cache.CachedArray.*; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.BytesRef; @@ -46,6 +48,14 @@ public interface FieldCache { Object value; } + /** + * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops + * processing terms and returns the current FieldCache + * array. + */ + public static final class StopFillCacheException extends RuntimeException { + } + /** * Marker interface as super-interface to all parsers. It * is used to specify a custom parser to {@link @@ -314,6 +324,19 @@ public interface FieldCache { public byte[] getBytes (IndexReader reader, String field, ByteParser parser) throws IOException; + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as bytes and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param reader Used to get field values. + * @param field Which field contains the bytes. + * @param creator Used to make the ByteValues + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public ByteValues getBytes(IndexReader reader, String field, EntryCreator creator ) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is * found, reads the terms in field as shorts and returns an array * of size reader.maxDoc() of the value each document @@ -338,6 +361,20 @@ public interface FieldCache { */ public short[] getShorts (IndexReader reader, String field, ShortParser parser) throws IOException; + + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as shorts and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param reader Used to get field values. + * @param field Which field contains the shorts. + * @param creator Computes short for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public ShortValues getShorts(IndexReader reader, String field, EntryCreator creator ) throws IOException; + /** Checks the internal cache for an appropriate entry, and if none is * found, reads the terms in field as integers and returns an array @@ -364,6 +401,19 @@ public interface FieldCache { public int[] getInts (IndexReader reader, String field, IntParser parser) throws IOException; + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as integers and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param reader Used to get field values. + * @param field Which field contains the integers. + * @param creator Computes integer for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public IntValues getInts(IndexReader reader, String field, EntryCreator creator ) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if * none is found, reads the terms in field as floats and returns an array * of size reader.maxDoc() of the value each document @@ -388,6 +438,19 @@ public interface FieldCache { */ public float[] getFloats (IndexReader reader, String field, FloatParser parser) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as floats and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the floats. + * @param creator Computes float for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public FloatValues getFloats(IndexReader reader, String field, EntryCreator creator ) throws IOException; + /** * Checks the internal cache for an appropriate entry, and if none is @@ -418,6 +481,20 @@ public interface FieldCache { public long[] getLongs(IndexReader reader, String field, LongParser parser) throws IOException; + /** + * Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as longs and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * + * @param reader Used to get field values. + * @param field Which field contains the longs. + * @param creator Computes integer for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public LongValues getLongs(IndexReader reader, String field, EntryCreator creator ) throws IOException; + /** * Checks the internal cache for an appropriate entry, and if none is @@ -448,6 +525,21 @@ public interface FieldCache { public double[] getDoubles(IndexReader reader, String field, DoubleParser parser) throws IOException; + /** + * Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as doubles and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * + * @param reader Used to get field values. + * @param field Which field contains the doubles. + * @param creator Computes integer for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public DoubleValues getDoubles(IndexReader reader, String field, EntryCreator creator ) throws IOException; + + /** Returned by {@link #getTerms} */ public abstract static class DocTerms { /** The BytesRef argument must not be null; the method diff --git a/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java index caf71a302ca..13548464028 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -19,27 +19,43 @@ package org.apache.lucene.search; import java.io.IOException; import java.io.PrintStream; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.WeakHashMap; -import org.apache.lucene.index.*; -import org.apache.lucene.util.PagedBytes; -import org.apache.lucene.util.packed.PackedInts; -import org.apache.lucene.util.packed.GrowableWriter; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.StringHelper; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.cache.ByteValuesCreator; +import org.apache.lucene.search.cache.DocTermsCreator; +import org.apache.lucene.search.cache.DocTermsIndexCreator; +import org.apache.lucene.search.cache.DoubleValuesCreator; +import org.apache.lucene.search.cache.EntryCreator; +import org.apache.lucene.search.cache.FloatValuesCreator; +import org.apache.lucene.search.cache.IntValuesCreator; +import org.apache.lucene.search.cache.LongValuesCreator; +import org.apache.lucene.search.cache.ShortValuesCreator; +import org.apache.lucene.search.cache.CachedArray.ByteValues; +import org.apache.lucene.search.cache.CachedArray.DoubleValues; +import org.apache.lucene.search.cache.CachedArray.FloatValues; +import org.apache.lucene.search.cache.CachedArray.IntValues; +import org.apache.lucene.search.cache.CachedArray.LongValues; +import org.apache.lucene.search.cache.CachedArray.ShortValues; import org.apache.lucene.util.FieldCacheSanityChecker; +import org.apache.lucene.util.StringHelper; /** * Expert: The default cache implementation, storing all values in memory. * A WeakHashMap is used for storage. * *

Created: May 19, 2004 4:40:36 PM - * + * + * @lucene.internal -- this is now public so that the tests can use reflection + * to call methods. It will likely be removed without (much) notice. + * * @since lucene 1.4 */ -class FieldCacheImpl implements FieldCache { +public class FieldCacheImpl implements FieldCache { // Made Public so that private Map,Cache> caches; FieldCacheImpl() { @@ -47,16 +63,16 @@ class FieldCacheImpl implements FieldCache { } private synchronized void init() { caches = new HashMap,Cache>(7); - caches.put(Byte.TYPE, new ByteCache(this)); - caches.put(Short.TYPE, new ShortCache(this)); - caches.put(Integer.TYPE, new IntCache(this)); - caches.put(Float.TYPE, new FloatCache(this)); - caches.put(Long.TYPE, new LongCache(this)); - caches.put(Double.TYPE, new DoubleCache(this)); - caches.put(DocTermsIndex.class, new DocTermsIndexCache(this)); - caches.put(DocTerms.class, new DocTermsCache(this)); + caches.put(Byte.TYPE, new Cache(this)); + caches.put(Short.TYPE, new Cache(this)); + caches.put(Integer.TYPE, new Cache(this)); + caches.put(Float.TYPE, new Cache(this)); + caches.put(Long.TYPE, new Cache(this)); + caches.put(Double.TYPE, new Cache(this)); + caches.put(DocTermsIndex.class, new Cache(this)); + caches.put(DocTerms.class, new Cache(this)); } - + public synchronized void purgeAllCaches() { init(); } @@ -70,17 +86,15 @@ class FieldCacheImpl implements FieldCache { public synchronized CacheEntry[] getCacheEntries() { List result = new ArrayList(17); for(final Map.Entry,Cache> cacheEntry: caches.entrySet()) { - final Cache cache = cacheEntry.getValue(); + final Cache cache = cacheEntry.getValue(); final Class cacheType = cacheEntry.getKey(); synchronized(cache.readerCache) { - for (final Map.Entry> readerCacheEntry : cache.readerCache.entrySet()) { - final Object readerKey = readerCacheEntry.getKey(); - if (readerKey == null) continue; - final Map innerCache = readerCacheEntry.getValue(); - for (final Map.Entry mapEntry : innerCache.entrySet()) { - Entry entry = mapEntry.getKey(); + for( Object readerKey : cache.readerCache.keySet() ) { + Map innerCache = cache.readerCache.get(readerKey); + for (final Map.Entry mapEntry : innerCache.entrySet()) { + Entry entry = (Entry)mapEntry.getKey(); result.add(new CacheEntryImpl(readerKey, entry.field, - cacheType, entry.custom, + cacheType, entry.creator, mapEntry.getValue())); } } @@ -93,11 +107,11 @@ class FieldCacheImpl implements FieldCache { private final Object readerKey; private final String fieldName; private final Class cacheType; - private final Object custom; + private final EntryCreator custom; private final Object value; CacheEntryImpl(Object readerKey, String fieldName, Class cacheType, - Object custom, + EntryCreator custom, Object value) { this.readerKey = readerKey; this.fieldName = fieldName; @@ -123,16 +137,8 @@ class FieldCacheImpl implements FieldCache { public Object getValue() { return value; } } - /** - * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops - * processing terms and returns the current FieldCache - * array. - */ - static final class StopFillCacheException extends RuntimeException { - } - /** Expert: Internal cache. */ - abstract static class Cache { + final static class Cache { Cache() { this.wrapper = null; } @@ -143,10 +149,11 @@ class FieldCacheImpl implements FieldCache { final FieldCache wrapper; - final Map> readerCache = new WeakHashMap>(); - - protected abstract Object createValue(IndexReader reader, Entry key) - throws IOException; + final Map,Object>> readerCache = new WeakHashMap,Object>>(); + + protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { + return entryKey.creator.create( reader ); + } /** Remove this reader from the cache, if present. */ public void purge(IndexReader r) { @@ -156,14 +163,14 @@ class FieldCacheImpl implements FieldCache { } } - public Object get(IndexReader reader, Entry key) throws IOException { - Map innerCache; + public Object get(IndexReader reader, Entry key) throws IOException { + Map,Object> innerCache; Object value; final Object readerKey = reader.getCoreCacheKey(); synchronized (readerCache) { innerCache = readerCache.get(readerKey); if (innerCache == null) { - innerCache = new HashMap(); + innerCache = new HashMap,Object>(); readerCache.put(readerKey, innerCache); value = null; } else { @@ -186,7 +193,7 @@ class FieldCacheImpl implements FieldCache { // Only check if key.custom (the parser) is // non-null; else, we check twice for a single // call to FieldCache.getXXX - if (key.custom != null && wrapper != null) { + if (key.creator != null && wrapper != null) { final PrintStream infoStream = wrapper.getInfoStream(); if (infoStream != null) { printNewInsanity(infoStream, progress.value); @@ -196,6 +203,11 @@ class FieldCacheImpl implements FieldCache { return progress.value; } } + + // Validate new entries + if( key.creator.shouldValidate() ) { + key.creator.validate( (T)value, reader); + } return value; } @@ -218,14 +230,14 @@ class FieldCacheImpl implements FieldCache { } /** Expert: Every composite-key in the internal cache is of this type. */ - static class Entry { + static class Entry { final String field; // which Fieldable - final Object custom; // which custom comparator or parser + final EntryCreator creator; // which custom comparator or parser /** Creates one of these objects for a custom comparator/parser. */ - Entry (String field, Object custom) { + Entry (String field, EntryCreator custom) { this.field = StringHelper.intern(field); - this.custom = custom; + this.creator = custom; } /** Two of these are equal iff they reference the same field and type. */ @@ -234,9 +246,9 @@ class FieldCacheImpl implements FieldCache { if (o instanceof Entry) { Entry other = (Entry) o; if (other.field == field) { - if (other.custom == null) { - if (custom == null) return true; - } else if (other.custom.equals (custom)) { + if (other.creator == null) { + if (creator == null) return true; + } else if (other.creator.equals (creator)) { return true; } } @@ -247,769 +259,129 @@ class FieldCacheImpl implements FieldCache { /** Composes a hashcode based on the field and type. */ @Override public int hashCode() { - return field.hashCode() ^ (custom==null ? 0 : custom.hashCode()); + return field.hashCode() ^ (creator==null ? 0 : creator.hashCode()); } } // inherit javadocs public byte[] getBytes (IndexReader reader, String field) throws IOException { - return getBytes(reader, field, null); + return getBytes(reader, field, new ByteValuesCreator(field, null)).values; } // inherit javadocs - public byte[] getBytes(IndexReader reader, String field, ByteParser parser) - throws IOException { - return (byte[]) caches.get(Byte.TYPE).get(reader, new Entry(field, parser)); + public byte[] getBytes(IndexReader reader, String field, ByteParser parser) throws IOException { + return getBytes(reader, field, new ByteValuesCreator(field, parser)).values; } - static final class ByteCache extends Cache { - ByteCache(FieldCache wrapper) { - super(wrapper); - } - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - Entry entry = entryKey; - String field = entry.field; - ByteParser parser = (ByteParser) entry.custom; - if (parser == null) { - return wrapper.getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER); - } - final byte[] retArray = new byte[reader.maxDoc()]; - Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - final byte termval = parser.parseByte(term); - docs = termsEnum.docs(delDocs, docs); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocsEnum.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - } - } - } catch (StopFillCacheException stop) { - } - } - return retArray; - } + public ByteValues getBytes(IndexReader reader, String field, EntryCreator creator ) throws IOException + { + return (ByteValues)caches.get(Byte.TYPE).get(reader, new Entry(field, creator)); } // inherit javadocs public short[] getShorts (IndexReader reader, String field) throws IOException { - return getShorts(reader, field, null); + return getShorts(reader, field, new ShortValuesCreator(field,null)).values; } // inherit javadocs - public short[] getShorts(IndexReader reader, String field, ShortParser parser) - throws IOException { - return (short[]) caches.get(Short.TYPE).get(reader, new Entry(field, parser)); + public short[] getShorts(IndexReader reader, String field, ShortParser parser) throws IOException { + return getShorts(reader, field, new ShortValuesCreator(field,parser)).values; } - static final class ShortCache extends Cache { - ShortCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - Entry entry = entryKey; - String field = entry.field; - ShortParser parser = (ShortParser) entry.custom; - if (parser == null) { - return wrapper.getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER); - } - final short[] retArray = new short[reader.maxDoc()]; - Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - final short termval = parser.parseShort(term); - docs = termsEnum.docs(delDocs, docs); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocsEnum.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - } - } - } catch (StopFillCacheException stop) { - } - } - return retArray; - } + public ShortValues getShorts(IndexReader reader, String field, EntryCreator creator ) throws IOException + { + return (ShortValues)caches.get(Short.TYPE).get(reader, new Entry(field, creator)); } // inherit javadocs public int[] getInts (IndexReader reader, String field) throws IOException { - return getInts(reader, field, null); + return getInts(reader, field, new IntValuesCreator( field, null )).values; } // inherit javadocs - public int[] getInts(IndexReader reader, String field, IntParser parser) - throws IOException { - return (int[]) caches.get(Integer.TYPE).get(reader, new Entry(field, parser)); + public int[] getInts(IndexReader reader, String field, IntParser parser) throws IOException { + return getInts(reader, field, new IntValuesCreator( field, parser )).values; } - static final class IntCache extends Cache { - IntCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - Entry entry = entryKey; - String field = entry.field; - IntParser parser = (IntParser) entry.custom; - if (parser == null) { - try { - return wrapper.getInts(reader, field, DEFAULT_INT_PARSER); - } catch (NumberFormatException ne) { - return wrapper.getInts(reader, field, NUMERIC_UTILS_INT_PARSER); - } - } - int[] retArray = null; - - Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - final int termval = parser.parseInt(term); - if (retArray == null) { - // late init so numeric fields don't double allocate - retArray = new int[reader.maxDoc()]; - } - - docs = termsEnum.docs(delDocs, docs); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocsEnum.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - } - } - } catch (StopFillCacheException stop) { - } - } - - if (retArray == null) { - // no values - retArray = new int[reader.maxDoc()]; - } - return retArray; - } + public IntValues getInts(IndexReader reader, String field, EntryCreator creator ) throws IOException + { + return (IntValues)caches.get(Integer.TYPE).get(reader, new Entry(field, creator)); } - - + // inherit javadocs - public float[] getFloats (IndexReader reader, String field) - throws IOException { - return getFloats(reader, field, null); + public float[] getFloats (IndexReader reader, String field) throws IOException { + return getFloats(reader, field, new FloatValuesCreator( field, null ) ).values; } // inherit javadocs - public float[] getFloats(IndexReader reader, String field, FloatParser parser) - throws IOException { - - return (float[]) caches.get(Float.TYPE).get(reader, new Entry(field, parser)); + public float[] getFloats(IndexReader reader, String field, FloatParser parser) throws IOException { + return getFloats(reader, field, new FloatValuesCreator( field, parser ) ).values; } - static final class FloatCache extends Cache { - FloatCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - Entry entry = entryKey; - String field = entry.field; - FloatParser parser = (FloatParser) entry.custom; - if (parser == null) { - try { - return wrapper.getFloats(reader, field, DEFAULT_FLOAT_PARSER); - } catch (NumberFormatException ne) { - return wrapper.getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER); - } - } - float[] retArray = null; - - Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - final float termval = parser.parseFloat(term); - if (retArray == null) { - // late init so numeric fields don't double allocate - retArray = new float[reader.maxDoc()]; - } - - docs = termsEnum.docs(delDocs, docs); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocsEnum.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - } - } - } catch (StopFillCacheException stop) { - } - } - - if (retArray == null) { - // no values - retArray = new float[reader.maxDoc()]; - } - return retArray; - } + public FloatValues getFloats(IndexReader reader, String field, EntryCreator creator ) throws IOException + { + return (FloatValues)caches.get(Float.TYPE).get(reader, new Entry(field, creator)); } - public long[] getLongs(IndexReader reader, String field) throws IOException { - return getLongs(reader, field, null); + return getLongs(reader, field, new LongValuesCreator( field, null ) ).values; } // inherit javadocs - public long[] getLongs(IndexReader reader, String field, FieldCache.LongParser parser) - throws IOException { - return (long[]) caches.get(Long.TYPE).get(reader, new Entry(field, parser)); + public long[] getLongs(IndexReader reader, String field, FieldCache.LongParser parser) throws IOException { + return getLongs(reader, field, new LongValuesCreator( field, parser ) ).values; } - static final class LongCache extends Cache { - LongCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entry) - throws IOException { - String field = entry.field; - FieldCache.LongParser parser = (FieldCache.LongParser) entry.custom; - if (parser == null) { - try { - return wrapper.getLongs(reader, field, DEFAULT_LONG_PARSER); - } catch (NumberFormatException ne) { - return wrapper.getLongs(reader, field, NUMERIC_UTILS_LONG_PARSER); - } - } - long[] retArray = null; - - Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - final long termval = parser.parseLong(term); - if (retArray == null) { - // late init so numeric fields don't double allocate - retArray = new long[reader.maxDoc()]; - } - - docs = termsEnum.docs(delDocs, docs); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocsEnum.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - } - } - } catch (StopFillCacheException stop) { - } - } - - if (retArray == null) { - // no values - retArray = new long[reader.maxDoc()]; - } - return retArray; - } + public LongValues getLongs(IndexReader reader, String field, EntryCreator creator ) throws IOException + { + return (LongValues)caches.get(Long.TYPE).get(reader, new Entry(field, creator)); + } + + // inherit javadocs + public double[] getDoubles(IndexReader reader, String field) throws IOException { + return getDoubles(reader, field, new DoubleValuesCreator( field, null ) ).values; } // inherit javadocs - public double[] getDoubles(IndexReader reader, String field) - throws IOException { - return getDoubles(reader, field, null); + public double[] getDoubles(IndexReader reader, String field, FieldCache.DoubleParser parser) throws IOException { + return getDoubles(reader, field, new DoubleValuesCreator( field, parser ) ).values; } - // inherit javadocs - public double[] getDoubles(IndexReader reader, String field, FieldCache.DoubleParser parser) - throws IOException { - return (double[]) caches.get(Double.TYPE).get(reader, new Entry(field, parser)); + public DoubleValues getDoubles(IndexReader reader, String field, EntryCreator creator ) throws IOException + { + return (DoubleValues)caches.get(Double.TYPE).get(reader, new Entry(field, creator)); } - static final class DoubleCache extends Cache { - DoubleCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - Entry entry = entryKey; - String field = entry.field; - FieldCache.DoubleParser parser = (FieldCache.DoubleParser) entry.custom; - if (parser == null) { - try { - return wrapper.getDoubles(reader, field, DEFAULT_DOUBLE_PARSER); - } catch (NumberFormatException ne) { - return wrapper.getDoubles(reader, field, NUMERIC_UTILS_DOUBLE_PARSER); - } - } - double[] retArray = null; - - Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docs = null; - try { - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - final double termval = parser.parseDouble(term); - if (retArray == null) { - // late init so numeric fields don't double allocate - retArray = new double[reader.maxDoc()]; - } - - docs = termsEnum.docs(delDocs, docs); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocsEnum.NO_MORE_DOCS) { - break; - } - retArray[docID] = termval; - } - } - } catch (StopFillCacheException stop) { - } - } - if (retArray == null) // no values - retArray = new double[reader.maxDoc()]; - return retArray; - } + public DocTermsIndex getTermsIndex(IndexReader reader, String field) throws IOException { + return getTermsIndex(reader, field, new DocTermsIndexCreator( field ) ); } - public static class DocTermsIndexImpl extends DocTermsIndex { - private final PagedBytes.Reader bytes; - private final PackedInts.Reader termOrdToBytesOffset; - private final PackedInts.Reader docToTermOrd; - private final int numOrd; - - public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { - this.bytes = bytes; - this.docToTermOrd = docToTermOrd; - this.termOrdToBytesOffset = termOrdToBytesOffset; - this.numOrd = numOrd; - } - - @Override - public PackedInts.Reader getDocToOrd() { - return docToTermOrd; - } - - @Override - public int numOrd() { - return numOrd; - } - - @Override - public int getOrd(int docID) { - return (int) docToTermOrd.get(docID); - } - - @Override - public int size() { - return docToTermOrd.size(); - } - - @Override - public BytesRef lookup(int ord, BytesRef ret) { - return bytes.fillUsingLengthPrefix(ret, termOrdToBytesOffset.get(ord)); - } - - @Override - public TermsEnum getTermsEnum() { - return this.new DocTermsIndexEnum(); - } - - class DocTermsIndexEnum extends TermsEnum { - int currentOrd; - int currentBlockNumber; - int end; // end position in the current block - final byte[][] blocks; - final int[] blockEnds; - - final BytesRef term = new BytesRef(); - - public DocTermsIndexEnum() { - currentOrd = 0; - currentBlockNumber = 0; - blocks = bytes.getBlocks(); - blockEnds = bytes.getBlockEnds(); - currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get(0)); - end = blockEnds[currentBlockNumber]; - } - - @Override - public SeekStatus seek(BytesRef text, boolean useCache) throws IOException { - // TODO - we can support with binary search - throw new UnsupportedOperationException(); - } - - @Override - public SeekStatus seek(long ord) throws IOException { - assert(ord >= 0 && ord <= numOrd); - // TODO: if gap is small, could iterate from current position? Or let user decide that? - currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get((int)ord)); - end = blockEnds[currentBlockNumber]; - currentOrd = (int)ord; - return SeekStatus.FOUND; - } - - @Override - public BytesRef next() throws IOException { - int start = term.offset + term.length; - if (start >= end) { - // switch byte blocks - if (currentBlockNumber +1 >= blocks.length) { - return null; - } - currentBlockNumber++; - term.bytes = blocks[currentBlockNumber]; - end = blockEnds[currentBlockNumber]; - start = 0; - if (end<=0) return null; // special case of empty last array - } - - currentOrd++; - - byte[] block = term.bytes; - if ((block[start] & 128) == 0) { - term.length = block[start]; - term.offset = start+1; - } else { - term.length = (((int) (block[start] & 0x7f)) << 8) | (block[1+start] & 0xff); - term.offset = start+2; - } - - return term; - } - - @Override - public BytesRef term() throws IOException { - return term; - } - - @Override - public long ord() throws IOException { - return currentOrd; - } - - @Override - public int docFreq() { - throw new UnsupportedOperationException(); - } - - @Override - public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public Comparator getComparator() throws IOException { - throw new UnsupportedOperationException(); - } - } + public DocTermsIndex getTermsIndex(IndexReader reader, String field, boolean fasterButMoreRAM) throws IOException { + return getTermsIndex(reader, field, new DocTermsIndexCreator( field, + fasterButMoreRAM ? DocTermsIndexCreator.FASTER_BUT_MORE_RAM : 0 ) ); } - private static boolean DEFAULT_FASTER_BUT_MORE_RAM = true; - - public DocTermsIndex getTermsIndex(IndexReader reader, String field) throws IOException { - return getTermsIndex(reader, field, DEFAULT_FASTER_BUT_MORE_RAM); - } - - public DocTermsIndex getTermsIndex(IndexReader reader, String field, boolean fasterButMoreRAM) throws IOException { - return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM))); - } - - static class DocTermsIndexCache extends Cache { - DocTermsIndexCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - - String field = StringHelper.intern(entryKey.field); - Terms terms = MultiFields.getTerms(reader, field); - - final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue(); - - final PagedBytes bytes = new PagedBytes(15); - - int startBytesBPV; - int startTermsBPV; - int startNumUniqueTerms; - - int maxDoc = reader.maxDoc(); - final int termCountHardLimit; - if (maxDoc == Integer.MAX_VALUE) { - termCountHardLimit = Integer.MAX_VALUE; - } else { - termCountHardLimit = maxDoc+1; - } - - if (terms != null) { - // Try for coarse estimate for number of bits; this - // should be an underestimate most of the time, which - // is fine -- GrowableWriter will reallocate as needed - long numUniqueTerms = 0; - try { - numUniqueTerms = terms.getUniqueTermCount(); - } catch (UnsupportedOperationException uoe) { - numUniqueTerms = -1; - } - if (numUniqueTerms != -1) { - - if (numUniqueTerms > termCountHardLimit) { - // app is misusing the API (there is more than - // one term per doc); in this case we make best - // effort to load what we can (see LUCENE-2142) - numUniqueTerms = termCountHardLimit; - } - - startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4); - startTermsBPV = PackedInts.bitsRequired(numUniqueTerms); - - startNumUniqueTerms = (int) numUniqueTerms; - } else { - startBytesBPV = 1; - startTermsBPV = 1; - startNumUniqueTerms = 1; - } - } else { - startBytesBPV = 1; - startTermsBPV = 1; - startNumUniqueTerms = 1; - } - - GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM); - final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, reader.maxDoc(), fasterButMoreRAM); - - // 0 is reserved for "unset" - bytes.copyUsingLengthPrefix(new BytesRef()); - int termOrd = 1; - - if (terms != null) { - final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docs = null; - - while(true) { - final BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - if (termOrd >= termCountHardLimit) { - break; - } - - if (termOrd == termOrdToBytesOffset.size()) { - // NOTE: this code only runs if the incoming - // reader impl doesn't implement - // getUniqueTermCount (which should be uncommon) - termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1)); - } - termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term)); - docs = termsEnum.docs(delDocs, docs); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocsEnum.NO_MORE_DOCS) { - break; - } - docToTermOrd.set(docID, termOrd); - } - termOrd++; - } - - if (termOrdToBytesOffset.size() > termOrd) { - termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd); - } - } - - // maybe an int-only impl? - return new DocTermsIndexImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd); - } - } - - private static class DocTermsImpl extends DocTerms { - private final PagedBytes.Reader bytes; - private final PackedInts.Reader docToOffset; - - public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) { - this.bytes = bytes; - this.docToOffset = docToOffset; - } - - @Override - public int size() { - return docToOffset.size(); - } - - @Override - public boolean exists(int docID) { - return docToOffset.get(docID) == 0; - } - - @Override - public BytesRef getTerm(int docID, BytesRef ret) { - final int pointer = (int) docToOffset.get(docID); - return bytes.fillUsingLengthPrefix(ret, pointer); - } + public DocTermsIndex getTermsIndex(IndexReader reader, String field, EntryCreator creator) throws IOException + { + return (DocTermsIndex)caches.get(DocTermsIndex.class).get(reader, new Entry(field, creator)); } // TODO: this if DocTermsIndex was already created, we // should share it... public DocTerms getTerms(IndexReader reader, String field) throws IOException { - return getTerms(reader, field, DEFAULT_FASTER_BUT_MORE_RAM); + return getTerms(reader, field, new DocTermsCreator( field ) ); } public DocTerms getTerms(IndexReader reader, String field, boolean fasterButMoreRAM) throws IOException { - return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, Boolean.valueOf(fasterButMoreRAM))); + return getTerms(reader, field, new DocTermsCreator( field, + fasterButMoreRAM ? DocTermsCreator.FASTER_BUT_MORE_RAM : 0 ) ); } - static final class DocTermsCache extends Cache { - DocTermsCache(FieldCache wrapper) { - super(wrapper); - } - - @Override - protected Object createValue(IndexReader reader, Entry entryKey) - throws IOException { - - String field = StringHelper.intern(entryKey.field); - Terms terms = MultiFields.getTerms(reader, field); - - final boolean fasterButMoreRAM = ((Boolean) entryKey.custom).booleanValue(); - - final int termCountHardLimit = reader.maxDoc(); - - // Holds the actual term data, expanded. - final PagedBytes bytes = new PagedBytes(15); - - int startBPV; - - if (terms != null) { - // Try for coarse estimate for number of bits; this - // should be an underestimate most of the time, which - // is fine -- GrowableWriter will reallocate as needed - long numUniqueTerms = 0; - try { - numUniqueTerms = terms.getUniqueTermCount(); - } catch (UnsupportedOperationException uoe) { - numUniqueTerms = -1; - } - if (numUniqueTerms != -1) { - if (numUniqueTerms > termCountHardLimit) { - numUniqueTerms = termCountHardLimit; - } - startBPV = PackedInts.bitsRequired(numUniqueTerms*4); - } else { - startBPV = 1; - } - } else { - startBPV = 1; - } - - final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM); - - // pointer==0 means not set - bytes.copyUsingLengthPrefix(new BytesRef()); - - if (terms != null) { - int termCount = 0; - final TermsEnum termsEnum = terms.iterator(); - final Bits delDocs = MultiFields.getDeletedDocs(reader); - DocsEnum docs = null; - while(true) { - if (termCount++ == termCountHardLimit) { - // app is misusing the API (there is more than - // one term per doc); in this case we make best - // effort to load what we can (see LUCENE-2142) - break; - } - - final BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - final long pointer = bytes.copyUsingLengthPrefix(term); - docs = termsEnum.docs(delDocs, docs); - while (true) { - final int docID = docs.nextDoc(); - if (docID == DocsEnum.NO_MORE_DOCS) { - break; - } - docToOffset.set(docID, pointer); - } - } - } - - // maybe an int-only impl? - return new DocTermsImpl(bytes.freeze(true), docToOffset.getMutable()); - } + public DocTerms getTerms(IndexReader reader, String field, EntryCreator creator) throws IOException + { + return (DocTerms)caches.get(DocTerms.class).get(reader, new Entry(field, creator)); } + private volatile PrintStream infoStream; public void setInfoStream(PrintStream stream) { diff --git a/lucene/src/java/org/apache/lucene/search/cache/ByteValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/ByteValuesCreator.java new file mode 100644 index 00000000000..59d5d4d9c2f --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/ByteValuesCreator.java @@ -0,0 +1,131 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.ByteParser; +import org.apache.lucene.search.cache.CachedArray.ByteValues; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.OpenBitSet; + +public class ByteValuesCreator extends CachedArrayCreator +{ + protected ByteParser parser; + + public ByteValuesCreator( String field, ByteParser parser, int options ) + { + super( field, options ); + this.parser = parser; + } + + public ByteValuesCreator( String field, ByteParser parser ) + { + super( field ); + this.parser = parser; + } + + @Override + public Class getArrayType() { + return Byte.class; + } + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + + @Override + public ByteValues create(IndexReader reader) throws IOException { + return validate( new ByteValues(), reader ); + } + + @Override + public ByteValues validate(ByteValues entry, IndexReader reader) throws IOException { + boolean ok = false; + if( hasOption(OPTION_CACHE_VALUES) ) { + ok = true; + if( entry.values == null ) { + fillByteValues(entry, reader, field); + } + } + if( hasOption(OPTION_CACHE_BITS) ) { + ok = true; + if( entry.valid == null ) { + fillValidBits(entry, reader, field); + } + } + if( !ok ) { + throw new RuntimeException( "the config must cache values and/or bits" ); + } + return entry; + } + + protected void fillByteValues( ByteValues vals, IndexReader reader, String field ) throws IOException + { + if( parser == null ) { + parser = FieldCache.DEFAULT_BYTE_PARSER; + } + assertSameParserAndResetCounts(vals, parser); + + Terms terms = MultiFields.getTerms(reader, field); + int maxDoc = reader.maxDoc(); + vals.values = new byte[maxDoc]; + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); + OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; + DocsEnum docs = null; + try { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final byte termval = parser.parseByte(term); + docs = termsEnum.docs(delDocs, docs); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + vals.values[docID] = termval; + vals.numDocs++; + if( validBits != null ) { + validBits.set( docID ); + } + } + vals.numTerms++; + } + } catch (FieldCache.StopFillCacheException stop) {} + + if( vals.valid == null ) { + vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + } + } + if( vals.valid == null && vals.numDocs < 1 ) { + vals.valid = new Bits.MatchNoBits( maxDoc ); + } + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/search/cache/CachedArray.java b/lucene/src/java/org/apache/lucene/search/cache/CachedArray.java new file mode 100644 index 00000000000..3eb51110e4a --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/CachedArray.java @@ -0,0 +1,78 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Bits; + +public abstract class CachedArray +{ + public Integer parserHashCode; // a flag to make sure you don't change what you are asking for in subsequent requests + public int numDocs; + public int numTerms; + + /** + * NOTE: these Bits may have false positives for deleted documents. That is, + * Documents that are deleted may be marked as valid but the array value is not. + */ + public Bits valid; + + public CachedArray() { + this.parserHashCode = null; + this.numDocs = 0; + this.numTerms = 0; + } + + /** + * @return the native array + */ + public abstract Object getRawArray(); + + //------------------------------------------------------------- + // Concrete Values + //------------------------------------------------------------- + + public static class ByteValues extends CachedArray { + public byte[] values = null; + @Override public byte[] getRawArray() { return values; } + }; + + public static class ShortValues extends CachedArray { + public short[] values = null; + @Override public short[] getRawArray() { return values; } + }; + + public static class IntValues extends CachedArray { + public int[] values = null; + @Override public int[] getRawArray() { return values; } + }; + + public static class FloatValues extends CachedArray { + public float[] values = null; + @Override public float[] getRawArray() { return values; } + }; + + public static class LongValues extends CachedArray { + public long[] values = null; + @Override public long[] getRawArray() { return values; } + }; + + public static class DoubleValues extends CachedArray { + public double[] values = null; + @Override public double[] getRawArray() { return values; } + }; +} diff --git a/lucene/src/java/org/apache/lucene/search/cache/CachedArrayCreator.java b/lucene/src/java/org/apache/lucene/search/cache/CachedArrayCreator.java new file mode 100644 index 00000000000..9e5e23b1c02 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/CachedArrayCreator.java @@ -0,0 +1,148 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldCache.Parser; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.OpenBitSet; + +public abstract class CachedArrayCreator extends EntryCreatorWithOptions +{ + public static final int OPTION_VALIDATE = 1; + public static final int OPTION_CACHE_VALUES = 2; + public static final int OPTION_CACHE_BITS = 4; + + // Composite Options Fields + public static final int CACHE_VALUES_AND_BITS = OPTION_CACHE_VALUES ^ OPTION_CACHE_BITS; + public static final int CACHE_VALUES_AND_BITS_VALIDATE = OPTION_CACHE_VALUES ^ OPTION_CACHE_BITS ^ OPTION_VALIDATE; + + public String field; + + public CachedArrayCreator( String field ) + { + super( OPTION_CACHE_VALUES ^ OPTION_VALIDATE ); + if( field == null ) { + throw new IllegalArgumentException( "field can not be null" ); + } + this.field = field; + } + + public CachedArrayCreator( String field, int flags ) + { + super( flags ); + if( field == null ) { + throw new IllegalArgumentException( "field can not be null" ); + } + this.field = field; + } + + /** + * Note that the 'flags' are not part of the key -- subsequent calls to the cache + * with different options will use the same cache entry. + */ + @Override + public EntryKey getCacheKey() { + return new SimpleEntryKey( CachedArray.class, getArrayType(), field ); + //return new Integer( CachedArrayCreator.class.hashCode() ^ getArrayType().hashCode() ^ field.hashCode() ); + } + + /** Return the type that the array will hold */ + public abstract Class getArrayType(); + + protected void assertSameParserAndResetCounts(T value, Parser parser) + { + int parserHashCode = parser.hashCode(); + if( value.parserHashCode != null && value.parserHashCode != parserHashCode ) { + throw new RuntimeException( "Parser changed in subsequet call. " + +value.parserHashCode+" != "+parserHashCode + " :: " + parser ); + } + value.parserHashCode = parserHashCode; + value.numDocs = value.numTerms = 0; + } + + /** + * Utility function to help check what bits are valid + */ + protected Bits checkMatchAllBits( Bits deleted, OpenBitSet valid, int maxDocs, int numDocs ) + { + if( numDocs != maxDocs ) { + if( hasOption( OPTION_CACHE_BITS ) ) { + if( deleted == null ) { + for( int i=0; i extends EntryCreatorWithOptions +{ + public static final int FASTER_BUT_MORE_RAM = 2; + + public String field; + + public DocTermsCreator( String field ) + { + super( FASTER_BUT_MORE_RAM ); // By default turn on FASTER_BUT_MORE_RAM + if( field == null ) { + throw new IllegalArgumentException( "field can not be null" ); + } + this.field = field; + } + + public DocTermsCreator( String field, int flags ) + { + super( flags ); + if( field == null ) { + throw new IllegalArgumentException( "field can not be null" ); + } + this.field = field; + } + + @Override + public SimpleEntryKey getCacheKey() { + return new SimpleEntryKey( DocTermsCreator.class, field ); + } + + @Override + public T create(IndexReader reader) throws IOException { + + String field = StringHelper.intern(this.field); // TODO?? necessary? + Terms terms = MultiFields.getTerms(reader, field); + + final boolean fasterButMoreRAM = hasOption( FASTER_BUT_MORE_RAM ); + final int termCountHardLimit = reader.maxDoc(); + + // Holds the actual term data, expanded. + final PagedBytes bytes = new PagedBytes(15); + + int startBPV; + + if (terms != null) { + // Try for coarse estimate for number of bits; this + // should be an underestimate most of the time, which + // is fine -- GrowableWriter will reallocate as needed + long numUniqueTerms = 0; + try { + numUniqueTerms = terms.getUniqueTermCount(); + } catch (UnsupportedOperationException uoe) { + numUniqueTerms = -1; + } + if (numUniqueTerms != -1) { + if (numUniqueTerms > termCountHardLimit) { + numUniqueTerms = termCountHardLimit; + } + startBPV = PackedInts.bitsRequired(numUniqueTerms*4); + } else { + startBPV = 1; + } + } else { + startBPV = 1; + } + + final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), fasterButMoreRAM); + + // pointer==0 means not set + bytes.copyUsingLengthPrefix(new BytesRef()); + + if (terms != null) { + int termCount = 0; + final TermsEnum termsEnum = terms.iterator(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); + DocsEnum docs = null; + while(true) { + if (termCount++ == termCountHardLimit) { + // app is misusing the API (there is more than + // one term per doc); in this case we make best + // effort to load what we can (see LUCENE-2142) + break; + } + + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final long pointer = bytes.copyUsingLengthPrefix(term); + docs = termsEnum.docs(delDocs, docs); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + docToOffset.set(docID, pointer); + } + } + } + + // maybe an int-only impl? + return (T)new DocTermsImpl(bytes.freeze(true), docToOffset.getMutable()); + } + + @Override + public T validate(T entry, IndexReader reader) throws IOException { + // TODO? nothing? perhaps subsequent call with FASTER_BUT_MORE_RAM? + return entry; + } + + private static class DocTermsImpl extends DocTerms { + private final PagedBytes.Reader bytes; + private final PackedInts.Reader docToOffset; + + public DocTermsImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) { + this.bytes = bytes; + this.docToOffset = docToOffset; + } + + @Override + public int size() { + return docToOffset.size(); + } + + @Override + public boolean exists(int docID) { + return docToOffset.get(docID) == 0; + } + + @Override + public BytesRef getTerm(int docID, BytesRef ret) { + final int pointer = (int) docToOffset.get(docID); + return bytes.fillUsingLengthPrefix(ret, pointer); + } + } +} diff --git a/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java b/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java new file mode 100644 index 00000000000..55b725c4e95 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java @@ -0,0 +1,318 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldCache.DocTermsIndex; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PagedBytes; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.packed.GrowableWriter; +import org.apache.lucene.util.packed.PackedInts; + +public class DocTermsIndexCreator extends EntryCreatorWithOptions +{ + public static final int FASTER_BUT_MORE_RAM = 2; + + public String field; + + public DocTermsIndexCreator( String field ) + { + super( FASTER_BUT_MORE_RAM ); // By default turn on FASTER_BUT_MORE_RAM + if( field == null ) { + throw new IllegalArgumentException( "field can not be null" ); + } + this.field = field; + } + + public DocTermsIndexCreator( String field, int flags ) + { + super( flags ); + if( field == null ) { + throw new IllegalArgumentException( "field can not be null" ); + } + this.field = field; + } + + @Override + public EntryKey getCacheKey() { + return new SimpleEntryKey( DocTermsIndexCreator.class, field ); + } + + @Override + public T create(IndexReader reader) throws IOException + { + String field = StringHelper.intern(this.field); // TODO?? necessary? + Terms terms = MultiFields.getTerms(reader, field); + + final boolean fasterButMoreRAM = hasOption(FASTER_BUT_MORE_RAM); + + final PagedBytes bytes = new PagedBytes(15); + + int startBytesBPV; + int startTermsBPV; + int startNumUniqueTerms; + + int maxDoc = reader.maxDoc(); + final int termCountHardLimit; + if (maxDoc == Integer.MAX_VALUE) { + termCountHardLimit = Integer.MAX_VALUE; + } else { + termCountHardLimit = maxDoc+1; + } + + if (terms != null) { + // Try for coarse estimate for number of bits; this + // should be an underestimate most of the time, which + // is fine -- GrowableWriter will reallocate as needed + long numUniqueTerms = 0; + try { + numUniqueTerms = terms.getUniqueTermCount(); + } catch (UnsupportedOperationException uoe) { + numUniqueTerms = -1; + } + if (numUniqueTerms != -1) { + + if (numUniqueTerms > termCountHardLimit) { + // app is misusing the API (there is more than + // one term per doc); in this case we make best + // effort to load what we can (see LUCENE-2142) + numUniqueTerms = termCountHardLimit; + } + + startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4); + startTermsBPV = PackedInts.bitsRequired(numUniqueTerms); + + startNumUniqueTerms = (int) numUniqueTerms; + } else { + startBytesBPV = 1; + startTermsBPV = 1; + startNumUniqueTerms = 1; + } + } else { + startBytesBPV = 1; + startTermsBPV = 1; + startNumUniqueTerms = 1; + } + + GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, fasterButMoreRAM); + final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, reader.maxDoc(), fasterButMoreRAM); + + // 0 is reserved for "unset" + bytes.copyUsingLengthPrefix(new BytesRef()); + int termOrd = 1; + + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); + DocsEnum docs = null; + + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + if (termOrd >= termCountHardLimit) { + break; + } + + if (termOrd == termOrdToBytesOffset.size()) { + // NOTE: this code only runs if the incoming + // reader impl doesn't implement + // getUniqueTermCount (which should be uncommon) + termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1)); + } + termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term)); + docs = termsEnum.docs(delDocs, docs); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + docToTermOrd.set(docID, termOrd); + } + termOrd++; + } + + if (termOrdToBytesOffset.size() > termOrd) { + termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd); + } + } + + // maybe an int-only impl? + return (T)new DocTermsIndexImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd); + } + + @Override + public T validate(T entry, IndexReader reader) throws IOException { + // TODO? nothing? perhaps subsequent call with FASTER_BUT_MORE_RAM? + return entry; + } + + //----------------------------------------------------------------------------- + //----------------------------------------------------------------------------- + + public static class DocTermsIndexImpl extends DocTermsIndex { + private final PagedBytes.Reader bytes; + private final PackedInts.Reader termOrdToBytesOffset; + private final PackedInts.Reader docToTermOrd; + private final int numOrd; + + public DocTermsIndexImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { + this.bytes = bytes; + this.docToTermOrd = docToTermOrd; + this.termOrdToBytesOffset = termOrdToBytesOffset; + this.numOrd = numOrd; + } + + @Override + public PackedInts.Reader getDocToOrd() { + return docToTermOrd; + } + + @Override + public int numOrd() { + return numOrd; + } + + @Override + public int getOrd(int docID) { + return (int) docToTermOrd.get(docID); + } + + @Override + public int size() { + return docToTermOrd.size(); + } + + @Override + public BytesRef lookup(int ord, BytesRef ret) { + return bytes.fillUsingLengthPrefix(ret, termOrdToBytesOffset.get(ord)); + } + + @Override + public TermsEnum getTermsEnum() { + return this.new DocTermsIndexEnum(); + } + + class DocTermsIndexEnum extends TermsEnum { + int currentOrd; + int currentBlockNumber; + int end; // end position in the current block + final byte[][] blocks; + final int[] blockEnds; + + final BytesRef term = new BytesRef(); + + public DocTermsIndexEnum() { + currentOrd = 0; + currentBlockNumber = 0; + blocks = bytes.getBlocks(); + blockEnds = bytes.getBlockEnds(); + currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get(0)); + end = blockEnds[currentBlockNumber]; + } + + @Override + public SeekStatus seek(BytesRef text, boolean useCache) throws IOException { + // TODO - we can support with binary search + throw new UnsupportedOperationException(); + } + + @Override + public SeekStatus seek(long ord) throws IOException { + assert(ord >= 0 && ord <= numOrd); + // TODO: if gap is small, could iterate from current position? Or let user decide that? + currentBlockNumber = bytes.fillUsingLengthPrefix2(term, termOrdToBytesOffset.get((int)ord)); + end = blockEnds[currentBlockNumber]; + currentOrd = (int)ord; + return SeekStatus.FOUND; + } + + @Override + public BytesRef next() throws IOException { + int start = term.offset + term.length; + if (start >= end) { + // switch byte blocks + if (currentBlockNumber +1 >= blocks.length) { + return null; + } + currentBlockNumber++; + term.bytes = blocks[currentBlockNumber]; + end = blockEnds[currentBlockNumber]; + start = 0; + if (end<=0) return null; // special case of empty last array + } + + currentOrd++; + + byte[] block = term.bytes; + if ((block[start] & 128) == 0) { + term.length = block[start]; + term.offset = start+1; + } else { + term.length = (((block[start] & 0x7f)) << 8) | (block[1+start] & 0xff); + term.offset = start+2; + } + + return term; + } + + @Override + public BytesRef term() throws IOException { + return term; + } + + @Override + public long ord() throws IOException { + return currentOrd; + } + + @Override + public int docFreq() { + throw new UnsupportedOperationException(); + } + + @Override + public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public Comparator getComparator() throws IOException { + throw new UnsupportedOperationException(); + } + } + } +} diff --git a/lucene/src/java/org/apache/lucene/search/cache/DoubleValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/DoubleValuesCreator.java new file mode 100644 index 00000000000..7c82dab4958 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/DoubleValuesCreator.java @@ -0,0 +1,150 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.DoubleParser; +import org.apache.lucene.search.cache.CachedArray.DoubleValues; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.OpenBitSet; + +public class DoubleValuesCreator extends CachedArrayCreator +{ + protected DoubleParser parser; + + public DoubleValuesCreator( String field, DoubleParser parser, int options ) + { + super( field, options ); + this.parser = parser; + } + + public DoubleValuesCreator( String field, DoubleParser parser ) + { + super( field ); + this.parser = parser; + } + + @Override + public Class getArrayType() { + return Double.class; + } + + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + + @Override + public DoubleValues create(IndexReader reader) throws IOException { + return validate( new DoubleValues(), reader ); + } + + @Override + public DoubleValues validate(DoubleValues entry, IndexReader reader) throws IOException { + boolean ok = false; + if( hasOption(OPTION_CACHE_VALUES) ) { + ok = true; + if( entry.values == null ) { + fillDoubleValues(entry, reader, field); + } + } + if( hasOption(OPTION_CACHE_BITS) ) { + ok = true; + if( entry.valid == null ) { + fillValidBits(entry, reader, field); + } + } + if( !ok ) { + throw new RuntimeException( "the config must cache values and/or bits" ); + } + return entry; + } + + protected void fillDoubleValues( DoubleValues vals, IndexReader reader, String field ) throws IOException + { + if( parser == null ) { + try { + parser = FieldCache.DEFAULT_DOUBLE_PARSER; + fillDoubleValues( vals, reader, field ); + return; + } + catch (NumberFormatException ne) { + vals.parserHashCode = null; // wipe the previous one + parser = FieldCache.NUMERIC_UTILS_DOUBLE_PARSER; + fillDoubleValues( vals, reader, field ); + return; + } + } + assertSameParserAndResetCounts(vals, parser); + + Terms terms = MultiFields.getTerms(reader, field); + int maxDoc = reader.maxDoc(); + vals.values = null; + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); + OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; + DocsEnum docs = null; + try { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final double termval = parser.parseDouble(term); + docs = termsEnum.docs(delDocs, docs); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + if(vals.values == null) { + vals.values = new double[maxDoc]; + } + vals.values[docID] = termval; + vals.numDocs++; + if( validBits != null ) { + validBits.set( docID ); + } + } + vals.numTerms++; + } + } catch (FieldCache.StopFillCacheException stop) {} + + if( vals.valid == null ) { + vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + } + } + + if(vals.values == null) { + vals.values = new double[maxDoc]; + } + + if( vals.valid == null && vals.numDocs < 1 ) { + vals.valid = new Bits.MatchNoBits( maxDoc ); + } + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/search/cache/EntryCreator.java b/lucene/src/java/org/apache/lucene/search/cache/EntryCreator.java new file mode 100644 index 00000000000..44f5e2270c6 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/EntryCreator.java @@ -0,0 +1,72 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Serializable; + +import org.apache.lucene.index.IndexReader; + +/** + * Create Cached Values for a given key + * + * @lucene.experimental + */ +public abstract class EntryCreator implements Serializable +{ + public abstract T create( IndexReader reader ) throws IOException; + public abstract T validate( T entry, IndexReader reader ) throws IOException; + + /** + * Indicate if a cached cached value should be checked before usage. + * This is useful if an application wants to support subsequent calls + * to the same cached object that may alter the cached object. If + * an application wants to avoid this (synchronized) check, it should + * return 'false' + * + * @return 'true' if the Cache should call 'validate' before returning a cached object + */ + public boolean shouldValidate() { + return true; + } + + /** + * @return A key to identify valid cache entries for subsequent requests + */ + public abstract EntryKey getCacheKey(); + + + //------------------------------------------------------------------------ + // The Following code is a hack to make things work while the + // EntryCreator is stored in in the FieldCache. + // When the FieldCache is replaced with a simpler map LUCENE-2665 + // This can be removed + //------------------------------------------------------------------------ + + public boolean equals(Object obj) { + if( obj instanceof EntryCreator ) { + return getCacheKey().equals( ((EntryCreator)obj).getCacheKey() ); + } + return false; + } + + @Override + public int hashCode() { + return getCacheKey().hashCode(); + } +} diff --git a/lucene/src/java/org/apache/lucene/search/cache/EntryCreatorWithOptions.java b/lucene/src/java/org/apache/lucene/search/cache/EntryCreatorWithOptions.java new file mode 100644 index 00000000000..0bc81be2cca --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/EntryCreatorWithOptions.java @@ -0,0 +1,41 @@ +package org.apache.lucene.search.cache; + + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +public abstract class EntryCreatorWithOptions extends EntryCreator +{ + public static final int OPTION_VALIDATE = 1; + + public int flags; + + public EntryCreatorWithOptions( int flag ) { + this.flags = flag; + } + + @Override + public boolean shouldValidate() { + return hasOption( OPTION_VALIDATE ); + } + + public boolean hasOption( int key ) + { + return (flags & key) == key; + } +} diff --git a/lucene/src/java/org/apache/lucene/search/cache/EntryKey.java b/lucene/src/java/org/apache/lucene/search/cache/EntryKey.java new file mode 100644 index 00000000000..fb8d67e7ab2 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/EntryKey.java @@ -0,0 +1,9 @@ +package org.apache.lucene.search.cache; + + +/** + * A Simple marker class -- Perhaps it could/should just be an Object + */ +public abstract class EntryKey { + +} diff --git a/lucene/src/java/org/apache/lucene/search/cache/FloatValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/FloatValuesCreator.java new file mode 100644 index 00000000000..fc8c6aeb957 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/FloatValuesCreator.java @@ -0,0 +1,150 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.FloatParser; +import org.apache.lucene.search.cache.CachedArray.FloatValues; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.OpenBitSet; + +public class FloatValuesCreator extends CachedArrayCreator +{ + protected FloatParser parser; + + public FloatValuesCreator( String field, FloatParser parser, int options ) + { + super( field, options ); + this.parser = parser; + } + + public FloatValuesCreator( String field, FloatParser parser ) + { + super( field ); + this.parser = parser; + } + + @Override + public Class getArrayType() { + return Float.class; + } + + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + + @Override + public FloatValues create(IndexReader reader) throws IOException { + return validate( new FloatValues(), reader ); + } + + @Override + public FloatValues validate(FloatValues entry, IndexReader reader) throws IOException { + boolean ok = false; + if( hasOption(OPTION_CACHE_VALUES) ) { + ok = true; + if( entry.values == null ) { + fillFloatValues(entry, reader, field); + } + } + if( hasOption(OPTION_CACHE_BITS) ) { + ok = true; + if( entry.valid == null ) { + fillValidBits(entry, reader, field); + } + } + if( !ok ) { + throw new RuntimeException( "the config must cache values and/or bits" ); + } + return entry; + } + + protected void fillFloatValues( FloatValues vals, IndexReader reader, String field ) throws IOException + { + if( parser == null ) { + try { + parser = FieldCache.DEFAULT_FLOAT_PARSER; + fillFloatValues( vals, reader, field ); + return; + } + catch (NumberFormatException ne) { + vals.parserHashCode = null; // wipe the previous one + parser = FieldCache.NUMERIC_UTILS_FLOAT_PARSER; + fillFloatValues( vals, reader, field ); + return; + } + } + assertSameParserAndResetCounts(vals, parser); + + Terms terms = MultiFields.getTerms(reader, field); + int maxDoc = reader.maxDoc(); + vals.values = null; + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); + OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; + DocsEnum docs = null; + try { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final float termval = parser.parseFloat(term); + docs = termsEnum.docs(delDocs, docs); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + if(vals.values == null) { + vals.values = new float[maxDoc]; + } + vals.values[docID] = termval; + vals.numDocs++; + if( validBits != null ) { + validBits.set( docID ); + } + } + vals.numTerms++; + } + } catch (FieldCache.StopFillCacheException stop) {} + + if( vals.valid == null ) { + vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + } + } + + if(vals.values == null) { + vals.values = new float[maxDoc]; + } + + if( vals.valid == null && vals.numDocs < 1 ) { + vals.valid = new Bits.MatchNoBits( maxDoc ); + } + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/search/cache/IntValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/IntValuesCreator.java new file mode 100644 index 00000000000..891cff48232 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/IntValuesCreator.java @@ -0,0 +1,150 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.IntParser; +import org.apache.lucene.search.cache.CachedArray.IntValues; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.OpenBitSet; + +public class IntValuesCreator extends CachedArrayCreator +{ + protected IntParser parser; + + public IntValuesCreator( String field, IntParser parser, int options ) + { + super( field, options ); + this.parser = parser; + } + + public IntValuesCreator( String field, IntParser parser ) + { + super( field ); + this.parser = parser; + } + + @Override + public Class getArrayType() { + return Integer.class; + } + + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + + @Override + public IntValues create(IndexReader reader) throws IOException { + return validate( new IntValues(), reader ); + } + + @Override + public IntValues validate(IntValues entry, IndexReader reader) throws IOException { + boolean ok = false; + if( hasOption(OPTION_CACHE_VALUES) ) { + ok = true; + if( entry.values == null ) { + fillIntValues(entry, reader, field); + } + } + if( hasOption(OPTION_CACHE_BITS) ) { + ok = true; + if( entry.valid == null ) { + fillValidBits(entry, reader, field); + } + } + if( !ok ) { + throw new RuntimeException( "the config must cache values and/or bits" ); + } + return entry; + } + + protected void fillIntValues( IntValues vals, IndexReader reader, String field ) throws IOException + { + if( parser == null ) { + try { + parser = FieldCache.DEFAULT_INT_PARSER; + fillIntValues( vals, reader, field ); + return; + } + catch (NumberFormatException ne) { + vals.parserHashCode = null; + parser = FieldCache.NUMERIC_UTILS_INT_PARSER; + fillIntValues( vals, reader, field ); + return; + } + } + assertSameParserAndResetCounts(vals, parser); + + Terms terms = MultiFields.getTerms(reader, field); + int maxDoc = reader.maxDoc(); + vals.values = null; + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); + OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; + DocsEnum docs = null; + try { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final int termval = parser.parseInt(term); + docs = termsEnum.docs(delDocs, docs); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + if(vals.values == null) { + vals.values = new int[maxDoc]; + } + vals.values[docID] = termval; + vals.numDocs++; + if( validBits != null ) { + validBits.set( docID ); + } + } + vals.numTerms++; + } + } catch (FieldCache.StopFillCacheException stop) {} + + if( vals.valid == null ) { + vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + } + } + + if(vals.values == null) { + vals.values = new int[maxDoc]; + } + + if( vals.valid == null && vals.numDocs < 1 ) { + vals.valid = new Bits.MatchNoBits( maxDoc ); + } + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/search/cache/LongValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/LongValuesCreator.java new file mode 100644 index 00000000000..d9ec5c901e7 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/LongValuesCreator.java @@ -0,0 +1,150 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.LongParser; +import org.apache.lucene.search.cache.CachedArray.LongValues; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.OpenBitSet; + +public class LongValuesCreator extends CachedArrayCreator +{ + protected LongParser parser; + + public LongValuesCreator( String field, LongParser parser, int options ) + { + super( field, options ); + this.parser = parser; + } + + public LongValuesCreator( String field, LongParser parser ) + { + super( field ); + this.parser = parser; + } + + @Override + public Class getArrayType() { + return Long.class; + } + + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + + @Override + public LongValues create(IndexReader reader) throws IOException { + return validate( new LongValues(), reader ); + } + + @Override + public LongValues validate(LongValues entry, IndexReader reader) throws IOException { + boolean ok = false; + if( hasOption(OPTION_CACHE_VALUES) ) { + ok = true; + if( entry.values == null ) { + fillLongValues(entry, reader, field); + } + } + if( hasOption(OPTION_CACHE_BITS) ) { + ok = true; + if( entry.valid == null ) { + fillValidBits(entry, reader, field); + } + } + if( !ok ) { + throw new RuntimeException( "the config must cache values and/or bits" ); + } + return entry; + } + + protected void fillLongValues( LongValues vals, IndexReader reader, String field ) throws IOException + { + if( parser == null ) { + try { + parser = FieldCache.DEFAULT_LONG_PARSER; + fillLongValues( vals, reader, field ); + return; + } + catch (NumberFormatException ne) { + vals.parserHashCode = null; // wipe the previous one + parser = FieldCache.NUMERIC_UTILS_LONG_PARSER; + fillLongValues( vals, reader, field ); + return; + } + } + assertSameParserAndResetCounts(vals, parser); + + Terms terms = MultiFields.getTerms(reader, field); + int maxDoc = reader.maxDoc(); + vals.values = null; + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); + OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; + DocsEnum docs = null; + try { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final long termval = parser.parseLong(term); + docs = termsEnum.docs(delDocs, docs); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + if(vals.values == null) { + vals.values = new long[maxDoc]; + } + vals.values[docID] = termval; + vals.numDocs++; + if( validBits != null ) { + validBits.set( docID ); + } + } + vals.numTerms++; + } + } catch (FieldCache.StopFillCacheException stop) {} + + if( vals.valid == null ) { + vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + } + } + + if(vals.values == null) { + vals.values = new long[maxDoc]; + } + + if( vals.valid == null && vals.numDocs < 1 ) { + vals.valid = new Bits.MatchNoBits( maxDoc ); + } + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/search/cache/ShortValuesCreator.java b/lucene/src/java/org/apache/lucene/search/cache/ShortValuesCreator.java new file mode 100644 index 00000000000..5f41ba4a6d7 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/ShortValuesCreator.java @@ -0,0 +1,132 @@ +package org.apache.lucene.search.cache; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.FieldCache.ShortParser; +import org.apache.lucene.search.cache.CachedArray.ShortValues; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.OpenBitSet; + +public class ShortValuesCreator extends CachedArrayCreator +{ + protected ShortParser parser; + + public ShortValuesCreator( String field, ShortParser parser, int options ) + { + super( field, options ); + this.parser = parser; + } + + public ShortValuesCreator( String field, ShortParser parser ) + { + super( field ); + this.parser = parser; + } + + @Override + public Class getArrayType() { + return Short.class; + } + + + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + + @Override + public ShortValues create(IndexReader reader) throws IOException { + return validate( new ShortValues(), reader ); + } + + @Override + public ShortValues validate(ShortValues entry, IndexReader reader) throws IOException { + boolean ok = false; + if( hasOption(OPTION_CACHE_VALUES) ) { + ok = true; + if( entry.values == null ) { + fillShortValues(entry, reader, field); + } + } + if( hasOption(OPTION_CACHE_BITS) ) { + ok = true; + if( entry.valid == null ) { + fillValidBits(entry, reader, field); + } + } + if( !ok ) { + throw new RuntimeException( "the config must cache values and/or bits" ); + } + return entry; + } + + protected void fillShortValues( ShortValues vals, IndexReader reader, String field ) throws IOException + { + if( parser == null ) { + parser = FieldCache.DEFAULT_SHORT_PARSER; + } + assertSameParserAndResetCounts(vals, parser); + + Terms terms = MultiFields.getTerms(reader, field); + int maxDoc = reader.maxDoc(); + vals.values = new short[maxDoc]; + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); + OpenBitSet validBits = (hasOption(OPTION_CACHE_BITS)) ? new OpenBitSet( maxDoc ) : null; + DocsEnum docs = null; + try { + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final Short termval = parser.parseShort(term); + docs = termsEnum.docs(delDocs, docs); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + vals.values[docID] = termval; + vals.numDocs++; + if( validBits != null ) { + validBits.set( docID ); + } + } + vals.numTerms++; + } + } catch (FieldCache.StopFillCacheException stop) {} + + if( vals.valid == null ) { + vals.valid = checkMatchAllBits( delDocs, validBits, vals.numDocs, maxDoc ); + } + } + if( vals.valid == null && vals.numDocs < 1 ) { + vals.valid = new Bits.MatchNoBits( maxDoc ); + } + } +} \ No newline at end of file diff --git a/lucene/src/java/org/apache/lucene/search/cache/SimpleEntryKey.java b/lucene/src/java/org/apache/lucene/search/cache/SimpleEntryKey.java new file mode 100644 index 00000000000..58b4883ce5b --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/cache/SimpleEntryKey.java @@ -0,0 +1,59 @@ +package org.apache.lucene.search.cache; + +public class SimpleEntryKey extends EntryKey +{ + public final Class clazz; + public final Object[] args; + public final int hash; + + public SimpleEntryKey( Class clazz, Object ... args ) { + this.clazz = clazz; + this.args = args; + + int hash = clazz.hashCode(); + if( args != null ) { + for( Object obj : args ) { + hash ^= obj.hashCode(); + } + } + this.hash = hash; + } + + @Override + public boolean equals(Object obj) { + if( obj instanceof SimpleEntryKey ) { + SimpleEntryKey key = (SimpleEntryKey)obj; + if( key.hash != hash || + key.clazz != clazz || + key.args.length != args.length ) { + return false; + } + + // In the off chance that the hash etc is all the same + // we should actually check the values + for( int i=0; i creator; + Class parser; + String field; + Number[] values; + + public NumberTypeTester( String f, String func, Class creator, Class parser ) { + field = f; + funcName = func; + this.creator = creator; + this.parser = parser; + values = new Number[NUM_DOCS]; + } + public String toString() + { + return field; + } + } + private NumberTypeTester[] typeTests; + + + @Override + public void setUp() throws Exception { + super.setUp(); + directory = newDirectory(); + RandomIndexWriter writer= new RandomIndexWriter(random, directory); + + typeTests = new NumberTypeTester[] { + new NumberTypeTester( "theRandomByte", "getBytes", ByteValuesCreator.class, ByteParser.class ), + new NumberTypeTester( "theRandomShort", "getShorts", ShortValuesCreator.class, ShortParser.class ), + new NumberTypeTester( "theRandomInt", "getInts", IntValuesCreator.class, IntParser.class ), + new NumberTypeTester( "theRandomLong", "getLongs", LongValuesCreator.class, LongParser.class ), + new NumberTypeTester( "theRandomFloat", "getFloats", FloatValuesCreator.class, FloatParser.class ), + new NumberTypeTester( "theRandomDouble", "getDoubles", DoubleValuesCreator.class, DoubleParser.class ), + }; + + for (int i = 0; i < NUM_DOCS; i++){ + Document doc = new Document(); + + // Test the valid bits + for( NumberTypeTester tester : typeTests ) { + if (random.nextInt(20) != 17 && i > 1) { + tester.values[i] = 10 + random.nextInt( 20 ); // get some field overlap + doc.add(newField(tester.field, String.valueOf(tester.values[i]), + Field.Store.NO, Field.Index.NOT_ANALYZED )); + } + } + writer.addDocument(doc); + } + + reader = writer.getReader(); + writer.close(); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + directory.close(); + super.tearDown(); + } + + public void testKeys() throws IOException { + // Check that the keys are unique for different fields + + EntryKey key_1 = new ByteValuesCreator( "field1", null ).getCacheKey(); + EntryKey key_2 = new ByteValuesCreator( "field2", null ).getCacheKey(); + assertThat("different fields should have a different key", key_1, not(key_2) ); + + key_1 = new ByteValuesCreator( "field1", null ).getCacheKey(); + key_2 = new ShortValuesCreator( "field1", null ).getCacheKey(); + assertThat( "same field different type should have different key", key_1, not( key_2 ) ); + + key_1 = new ByteValuesCreator( "ff", null ).getCacheKey(); + key_2 = new ByteValuesCreator( "ff", null ).getCacheKey(); + assertThat( "same args should have same key", key_1, is( key_2 ) ); + + key_1 = new ByteValuesCreator( "ff", null, ByteValuesCreator.OPTION_CACHE_BITS ^ ByteValuesCreator.OPTION_CACHE_VALUES ).getCacheKey(); + key_2 = new ByteValuesCreator( "ff", null ).getCacheKey(); + assertThat( "different options should share same key", key_1, is( key_2 ) ); + + key_1 = new IntValuesCreator( "ff", FieldCache.DEFAULT_INT_PARSER ).getCacheKey(); + key_2 = new IntValuesCreator( "ff", FieldCache.NUMERIC_UTILS_INT_PARSER ).getCacheKey(); + assertThat( "diferent parser should have same key", key_1, is( key_2 ) ); + } + + private CachedArray getWithReflection( FieldCache cache, NumberTypeTester tester, int flags ) throws IOException + { + try { + Method getXXX = cache.getClass().getMethod( tester.funcName, IndexReader.class, String.class, EntryCreator.class ); + Constructor constructor = tester.creator.getConstructor( String.class, tester.parser, Integer.TYPE ); + CachedArrayCreator creator = (CachedArrayCreator)constructor.newInstance( tester.field, null, flags ); + return (CachedArray) getXXX.invoke(cache, reader, tester.field, creator ); + } + catch( Exception ex ) { + throw new RuntimeException( "Reflection failed", ex ); + } + } + + public void testCachedArrays() throws IOException + { + FieldCache cache = FieldCache.DEFAULT; + + // Check the Different CachedArray Types + CachedArray last = null; + CachedArray justbits = null; + + for( NumberTypeTester tester : typeTests ) { + justbits = getWithReflection( cache, tester, CachedArrayCreator.OPTION_CACHE_BITS ); + assertNull( "should not get values : "+tester, justbits.getRawArray() ); + assertNotNull( "should get bits : "+tester, justbits.valid ); + last = getWithReflection( cache, tester, CachedArrayCreator.CACHE_VALUES_AND_BITS ); + assertEquals( "should use same cached object : "+tester, justbits, last ); + assertNull( "Validate=false shoudl not regenerate : "+tester, justbits.getRawArray() ); + last = getWithReflection( cache, tester, CachedArrayCreator.CACHE_VALUES_AND_BITS_VALIDATE ); + assertEquals( "should use same cached object : "+tester, justbits, last ); + assertNotNull( "Validate=true should add the Array : "+tester, justbits.getRawArray() ); + checkCachedArrayValuesAndBits( tester, last ); + } + } + + private void checkCachedArrayValuesAndBits( NumberTypeTester tester, CachedArray cachedVals ) + { +// for( int i=0; i distinctTerms = new HashSet(); + for( int i=0; i