diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5a35d44b897..bea1749c0d2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -161,6 +161,10 @@ Optimizations posting lists. All index data is represented as consecutive byte/int arrays to reduce GC cost and memory overhead. (Simon Willnauer) +* LUCENE-4538: DocValues now caches direct sources in a ThreadLocal exposed via SourceCache. + Users of this API can now simply obtain an instance via DocValues#getDirectSource per thread. + (Simon Willnauer) + Build * Upgrade randomized testing to version 2.0.4: avoid hangs on shutdown diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java index f831efd64ec..df688180da1 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java @@ -136,7 +136,7 @@ public class SimpleTextPerDocProducer extends PerDocProducerBase { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { boolean success = false; IndexInput in = input.clone(); try { @@ -198,9 +198,14 @@ public class SimpleTextPerDocProducer extends PerDocProducerBase { assert scratch.equals(END); return reader.getSource(); } - + @Override public Source getDirectSource() throws IOException { + return this.getSource(); // don't cache twice + } + + @Override + protected Source loadDirectSource() throws IOException { return this.getSource(); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java index 5613d8e97a2..2abe64f752e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java @@ -308,7 +308,7 @@ public final class Bytes { /** * Opens all necessary files, but does not read any data in until you call - * {@link #load}. + * {@link #loadSource}. */ static abstract class BytesReaderBase extends DocValues { protected final IndexInput idxIn; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java index 5f733188ad8..6d34d29136c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java @@ -79,12 +79,12 @@ class FixedDerefBytesImpl { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { return new FixedDerefSource(cloneData(), cloneIndex(), size, numValuesStored); } @Override - public Source getDirectSource() + protected Source loadDirectSource() throws IOException { return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, getType()); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java index 3e18fbc1c56..a65b7c044a6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java @@ -135,13 +135,13 @@ class FixedSortedBytesImpl { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { return new FixedSortedSource(cloneData(), cloneIndex(), size, valueCount, comparator); } @Override - public Source getDirectSource() throws IOException { + protected Source loadDirectSource() throws IOException { return new DirectFixedSortedSource(cloneData(), cloneIndex(), size, valueCount, comparator, type); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java index 8b630e9aa03..a74cb90107c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java @@ -280,7 +280,7 @@ class FixedStraightBytesImpl { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { return size == 1 ? new SingleByteSource(cloneData(), maxDoc) : new FixedStraightSource(cloneData(), size, maxDoc, type); } @@ -291,7 +291,7 @@ class FixedStraightBytesImpl { } @Override - public Source getDirectSource() throws IOException { + protected Source loadDirectSource() throws IOException { return new DirectFixedStraightSource(cloneData(), size, getType()); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java index c82033ddbf2..8eec5d6d2dd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java @@ -125,7 +125,7 @@ public class Floats { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { final IndexInput indexInput = cloneData(); try { return arrayTemplate.newFromInput(indexInput, maxDoc); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java index 97df84b2f8a..56db89f574f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java @@ -149,7 +149,7 @@ public final class Ints { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { final IndexInput indexInput = cloneData(); try { return arrayTemplate.newFromInput(indexInput, maxDoc); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java index 4f2765342d0..01b3652c3c8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java @@ -149,7 +149,7 @@ class PackedIntValues { /** * Opens all necessary files, but does not read any data in until you call - * {@link #load}. + * {@link #loadSource}. */ static class PackedIntsReader extends DocValues { private final IndexInput datIn; @@ -182,7 +182,7 @@ class PackedIntValues { * already previously loaded but then discarded the Source. */ @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { boolean success = false; final Source source; IndexInput input = null; @@ -217,7 +217,7 @@ class PackedIntValues { @Override - public Source getDirectSource() throws IOException { + protected Source loadDirectSource() throws IOException { return values != null ? new FixedStraightBytesImpl.DirectFixedStraightSource(datIn.clone(), 8, Type.FIXED_INTS_64) : new PackedIntsSource(datIn.clone(), true); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java index c766a3640c1..c89939668a3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java @@ -99,12 +99,12 @@ class VarDerefBytesImpl { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { return new VarDerefSource(cloneData(), cloneIndex(), totalBytes); } @Override - public Source getDirectSource() + protected Source loadDirectSource() throws IOException { return new DirectVarDerefSource(cloneData(), cloneIndex(), getType()); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java index 01d336d64af..6cc4a8dae7c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java @@ -161,13 +161,13 @@ final class VarSortedBytesImpl { } @Override - public org.apache.lucene.index.DocValues.Source load() + public org.apache.lucene.index.DocValues.Source loadSource() throws IOException { return new VarSortedSource(cloneData(), cloneIndex(), comparator); } @Override - public Source getDirectSource() throws IOException { + protected Source loadDirectSource() throws IOException { return new DirectSortedSource(cloneData(), cloneIndex(), comparator, getType()); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java index 9a23cbe5f0e..97dab57ba42 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java @@ -247,12 +247,12 @@ class VarStraightBytesImpl { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { return new VarStraightSource(cloneData(), cloneIndex()); } @Override - public Source getDirectSource() + protected Source loadDirectSource() throws IOException { return new DirectVarStraightSource(cloneData(), cloneIndex(), getType()); } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValues.java b/lucene/core/src/java/org/apache/lucene/index/DocValues.java index 5d5d1124ca1..7e73ec0c94e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocValues.java @@ -33,8 +33,8 @@ import org.apache.lucene.document.PackedLongDocValuesField; // javadocs import org.apache.lucene.document.ShortDocValuesField; // javadocs import org.apache.lucene.document.SortedBytesDocValuesField; // javadocs import org.apache.lucene.document.StraightBytesDocValuesField; // javadocs -import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.packed.PackedInts; /** @@ -95,7 +95,6 @@ public abstract class DocValues implements Closeable { private volatile SourceCache cache = new SourceCache.DirectSourceCache(); private final Object cacheLock = new Object(); - /** Sole constructor. (For invocation by subclass * constructors, typically implicit.) */ protected DocValues() { @@ -112,12 +111,12 @@ public abstract class DocValues implements Closeable { * @see #getSource() * @see #setCache(SourceCache) */ - public abstract Source load() throws IOException; + protected abstract Source loadSource() throws IOException; /** * Returns a {@link Source} instance through the current {@link SourceCache}. * Iff no {@link Source} has been loaded into the cache so far the source will - * be loaded through {@link #load()} and passed to the {@link SourceCache}. + * be loaded through {@link #loadSource()} and passed to the {@link SourceCache}. * The caller of this method should not close the obtained {@link Source} * instance unless it is not needed for the rest of its life time. *

@@ -129,12 +128,30 @@ public abstract class DocValues implements Closeable { public Source getSource() throws IOException { return cache.load(this); } + + /** + * Returns a disk resident {@link Source} instance through the current + * {@link SourceCache}. Direct Sources are cached per thread in the + * {@link SourceCache}. The obtained instance should not be shared with other + * threads. + */ + public Source getDirectSource() throws IOException { + return this.cache.loadDirect(this); + } + /** - * Returns a disk resident {@link Source} instance. Direct Sources are not - * cached in the {@link SourceCache} and should not be shared between threads. + * Loads a new {@link Source direct source} instance from this {@link DocValues} field + * instance. Source instances returned from this method are not cached. It is + * the callers responsibility to maintain the instance and release its + * resources once the source is not needed anymore. + *

+ * For managed {@link Source direct source} instances see {@link #getDirectSource()}. + * + * @see #getDirectSource() + * @see #setCache(SourceCache) */ - public abstract Source getDirectSource() throws IOException; + protected abstract Source loadDirectSource() throws IOException; /** * Returns the {@link Type} of this {@link DocValues} instance @@ -163,10 +180,10 @@ public abstract class DocValues implements Closeable { /** * Sets the {@link SourceCache} used by this {@link DocValues} instance. This - * method should be called before {@link #load()} is called. All {@link Source} instances in the currently used cache will be closed + * method should be called before {@link #loadSource()} is called. All {@link Source} instances in the currently used cache will be closed * before the new cache is installed. *

- * Note: All instances previously obtained from {@link #load()} will be lost. + * Note: All instances previously obtained from {@link #loadSource()} will be lost. * * @throws IllegalArgumentException * if the given cache is null @@ -181,6 +198,14 @@ public abstract class DocValues implements Closeable { toClose.close(this); } } + /** + * Returns the currently used cache instance; + * @see #setCache(SourceCache) + */ + // for tests + SourceCache getCache() { + return cache; + } /** * Source of per document values like long, double or {@link BytesRef} @@ -687,9 +712,9 @@ public abstract class DocValues implements Closeable { /** * Abstract base class for {@link DocValues} {@link Source} cache. *

- * {@link Source} instances loaded via {@link DocValues#load()} are entirely memory resident + * {@link Source} instances loaded via {@link DocValues#loadSource()} are entirely memory resident * and need to be maintained by the caller. Each call to - * {@link DocValues#load()} will cause an entire reload of + * {@link DocValues#loadSource()} will cause an entire reload of * the underlying data. Source instances obtained from * {@link DocValues#getSource()} and {@link DocValues#getSource()} * respectively are maintained by a {@link SourceCache} that is closed ( @@ -721,6 +746,15 @@ public abstract class DocValues implements Closeable { * This method will not return null */ public abstract Source load(DocValues values) throws IOException; + + /** + * Atomically loads a {@link Source direct source} into the per-thread cache from the given + * {@link DocValues} and returns it iff no other {@link Source direct source} has already + * been cached. Otherwise the cached source is returned. + *

+ * This method will not return null + */ + public abstract Source loadDirect(DocValues values) throws IOException; /** * Atomically invalidates the cached {@link Source} @@ -744,20 +778,34 @@ public abstract class DocValues implements Closeable { */ public static final class DirectSourceCache extends SourceCache { private Source ref; - + private final CloseableThreadLocal directSourceCache = new CloseableThreadLocal(); + /** Sole constructor. */ public DirectSourceCache() { } public synchronized Source load(DocValues values) throws IOException { if (ref == null) { - ref = values.load(); + ref = values.loadSource(); } return ref; } public synchronized void invalidate(DocValues values) { ref = null; + directSourceCache.close(); + } + + @Override + public synchronized Source loadDirect(DocValues values) throws IOException { + final Source source = directSourceCache.get(); + if (source == null) { + final Source loadDirectSource = values.loadDirectSource(); + directSourceCache.set(loadDirectSource); + return loadDirectSource; + } else { + return source; + } } } } diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java index a079cd07d0c..69252d3bfc4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java @@ -185,7 +185,7 @@ class MultiDocValues extends DocValues { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { return new MultiSource(slices, starts, false, type); } @@ -199,7 +199,7 @@ class MultiDocValues extends DocValues { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { return emptySource; } @@ -209,7 +209,7 @@ class MultiDocValues extends DocValues { } @Override - public Source getDirectSource() throws IOException { + protected Source loadDirectSource() throws IOException { return emptySource; } } @@ -226,7 +226,7 @@ class MultiDocValues extends DocValues { } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { return emptyFixedSource; } @@ -241,7 +241,7 @@ class MultiDocValues extends DocValues { } @Override - public Source getDirectSource() throws IOException { + protected Source loadDirectSource() throws IOException { return emptyFixedSource; } } @@ -594,7 +594,7 @@ class MultiDocValues extends DocValues { } @Override - public Source getDirectSource() throws IOException { + protected Source loadDirectSource() throws IOException { return new MultiSource(slices, starts, true, type); } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java index 4aa02a8e46c..865599d6570 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java @@ -425,8 +425,6 @@ public class TestDocValues extends LuceneTestCase { private Source getSource(DocValues values) throws IOException { // getSource uses cache internally switch(random().nextInt(5)) { - case 3: - return values.load(); case 2: return values.getDirectSource(); case 1: diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index 68d5d696c83..1f6f2603389 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -47,7 +47,9 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.DocValues.SourceCache; import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.DocValues.SourceCache.DirectSourceCache; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DocIdSetIterator; @@ -576,15 +578,26 @@ public class TestDocValuesIndexing extends LuceneTestCase { } private DocValues getDocValues(IndexReader reader, String field) throws IOException { - return MultiDocValues.getDocValues(reader, field); - } + final DocValues docValues = MultiDocValues.getDocValues(reader, field); + if (docValues == null) { + return docValues; + } + if (rarely()) { + docValues.setCache(new NotCachingSourceCache()); + } else { + if (!(docValues.getCache() instanceof DirectSourceCache)) { + docValues.setCache(new DirectSourceCache()); + } + } + return docValues; + } @SuppressWarnings("fallthrough") private Source getSource(DocValues values) throws IOException { // getSource uses cache internally switch(random().nextInt(5)) { case 3: - return values.load(); + return values.loadSource(); case 2: return values.getDirectSource(); case 1: @@ -764,7 +777,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.forceMerge(1); DirectoryReader r = w.getReader(); w.close(); - assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0)); + assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0)); r.close(); d.close(); } @@ -791,7 +804,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.forceMerge(1); DirectoryReader r = w.getReader(); w.close(); - assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0)); + assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0)); r.close(); d.close(); } @@ -1072,4 +1085,24 @@ public class TestDocValuesIndexing extends LuceneTestCase { writer.close(); dir.close(); } + + /** + * + */ + public static class NotCachingSourceCache extends SourceCache { + + @Override + public Source load(DocValues values) throws IOException { + return values.loadSource(); + } + + @Override + public Source loadDirect(DocValues values) throws IOException { + return values.loadDirectSource(); + } + + @Override + public void invalidate(DocValues values) {} + } + } diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java index 6a695126f4f..77de5126c28 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java @@ -33,12 +33,12 @@ class MemoryIndexNormDocValues extends DocValues { this.source = source; } @Override - public Source load() throws IOException { + protected Source loadSource() throws IOException { return source; } @Override - public Source getDirectSource() throws IOException { + protected Source loadDirectSource() throws IOException { return source; }