LUCENE-4538: Cache DocValues DirectSource

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1406153 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-11-06 14:43:57 +00:00
parent d370453c58
commit bf107c5026
17 changed files with 136 additions and 48 deletions

View File

@ -161,6 +161,10 @@ Optimizations
posting lists. All index data is represented as consecutive byte/int arrays to posting lists. All index data is represented as consecutive byte/int arrays to
reduce GC cost and memory overhead. (Simon Willnauer) reduce GC cost and memory overhead. (Simon Willnauer)
* LUCENE-4538: DocValues now caches direct sources in a ThreadLocal exposed via SourceCache.
Users of this API can now simply obtain an instance via DocValues#getDirectSource per thread.
(Simon Willnauer)
Build Build
* Upgrade randomized testing to version 2.0.4: avoid hangs on shutdown * Upgrade randomized testing to version 2.0.4: avoid hangs on shutdown

View File

@ -136,7 +136,7 @@ public class SimpleTextPerDocProducer extends PerDocProducerBase {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
boolean success = false; boolean success = false;
IndexInput in = input.clone(); IndexInput in = input.clone();
try { try {
@ -198,9 +198,14 @@ public class SimpleTextPerDocProducer extends PerDocProducerBase {
assert scratch.equals(END); assert scratch.equals(END);
return reader.getSource(); return reader.getSource();
} }
@Override @Override
public Source getDirectSource() throws IOException { public Source getDirectSource() throws IOException {
return this.getSource(); // don't cache twice
}
@Override
protected Source loadDirectSource() throws IOException {
return this.getSource(); return this.getSource();
} }

View File

@ -308,7 +308,7 @@ public final class Bytes {
/** /**
* Opens all necessary files, but does not read any data in until you call * Opens all necessary files, but does not read any data in until you call
* {@link #load}. * {@link #loadSource}.
*/ */
static abstract class BytesReaderBase extends DocValues { static abstract class BytesReaderBase extends DocValues {
protected final IndexInput idxIn; protected final IndexInput idxIn;

View File

@ -79,12 +79,12 @@ class FixedDerefBytesImpl {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
return new FixedDerefSource(cloneData(), cloneIndex(), size, numValuesStored); return new FixedDerefSource(cloneData(), cloneIndex(), size, numValuesStored);
} }
@Override @Override
public Source getDirectSource() protected Source loadDirectSource()
throws IOException { throws IOException {
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, getType()); return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, getType());
} }

View File

@ -135,13 +135,13 @@ class FixedSortedBytesImpl {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
return new FixedSortedSource(cloneData(), cloneIndex(), size, valueCount, return new FixedSortedSource(cloneData(), cloneIndex(), size, valueCount,
comparator); comparator);
} }
@Override @Override
public Source getDirectSource() throws IOException { protected Source loadDirectSource() throws IOException {
return new DirectFixedSortedSource(cloneData(), cloneIndex(), size, return new DirectFixedSortedSource(cloneData(), cloneIndex(), size,
valueCount, comparator, type); valueCount, comparator, type);
} }

View File

@ -280,7 +280,7 @@ class FixedStraightBytesImpl {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
return size == 1 ? new SingleByteSource(cloneData(), maxDoc) : return size == 1 ? new SingleByteSource(cloneData(), maxDoc) :
new FixedStraightSource(cloneData(), size, maxDoc, type); new FixedStraightSource(cloneData(), size, maxDoc, type);
} }
@ -291,7 +291,7 @@ class FixedStraightBytesImpl {
} }
@Override @Override
public Source getDirectSource() throws IOException { protected Source loadDirectSource() throws IOException {
return new DirectFixedStraightSource(cloneData(), size, getType()); return new DirectFixedStraightSource(cloneData(), size, getType());
} }

View File

@ -125,7 +125,7 @@ public class Floats {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
final IndexInput indexInput = cloneData(); final IndexInput indexInput = cloneData();
try { try {
return arrayTemplate.newFromInput(indexInput, maxDoc); return arrayTemplate.newFromInput(indexInput, maxDoc);

View File

@ -149,7 +149,7 @@ public final class Ints {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
final IndexInput indexInput = cloneData(); final IndexInput indexInput = cloneData();
try { try {
return arrayTemplate.newFromInput(indexInput, maxDoc); return arrayTemplate.newFromInput(indexInput, maxDoc);

View File

@ -149,7 +149,7 @@ class PackedIntValues {
/** /**
* Opens all necessary files, but does not read any data in until you call * Opens all necessary files, but does not read any data in until you call
* {@link #load}. * {@link #loadSource}.
*/ */
static class PackedIntsReader extends DocValues { static class PackedIntsReader extends DocValues {
private final IndexInput datIn; private final IndexInput datIn;
@ -182,7 +182,7 @@ class PackedIntValues {
* already previously loaded but then discarded the Source. * already previously loaded but then discarded the Source.
*/ */
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
boolean success = false; boolean success = false;
final Source source; final Source source;
IndexInput input = null; IndexInput input = null;
@ -217,7 +217,7 @@ class PackedIntValues {
@Override @Override
public Source getDirectSource() throws IOException { protected Source loadDirectSource() throws IOException {
return values != null ? new FixedStraightBytesImpl.DirectFixedStraightSource(datIn.clone(), 8, Type.FIXED_INTS_64) : new PackedIntsSource(datIn.clone(), true); return values != null ? new FixedStraightBytesImpl.DirectFixedStraightSource(datIn.clone(), 8, Type.FIXED_INTS_64) : new PackedIntsSource(datIn.clone(), true);
} }
} }

View File

@ -99,12 +99,12 @@ class VarDerefBytesImpl {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
return new VarDerefSource(cloneData(), cloneIndex(), totalBytes); return new VarDerefSource(cloneData(), cloneIndex(), totalBytes);
} }
@Override @Override
public Source getDirectSource() protected Source loadDirectSource()
throws IOException { throws IOException {
return new DirectVarDerefSource(cloneData(), cloneIndex(), getType()); return new DirectVarDerefSource(cloneData(), cloneIndex(), getType());
} }

View File

@ -161,13 +161,13 @@ final class VarSortedBytesImpl {
} }
@Override @Override
public org.apache.lucene.index.DocValues.Source load() public org.apache.lucene.index.DocValues.Source loadSource()
throws IOException { throws IOException {
return new VarSortedSource(cloneData(), cloneIndex(), comparator); return new VarSortedSource(cloneData(), cloneIndex(), comparator);
} }
@Override @Override
public Source getDirectSource() throws IOException { protected Source loadDirectSource() throws IOException {
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, getType()); return new DirectSortedSource(cloneData(), cloneIndex(), comparator, getType());
} }

View File

@ -247,12 +247,12 @@ class VarStraightBytesImpl {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
return new VarStraightSource(cloneData(), cloneIndex()); return new VarStraightSource(cloneData(), cloneIndex());
} }
@Override @Override
public Source getDirectSource() protected Source loadDirectSource()
throws IOException { throws IOException {
return new DirectVarStraightSource(cloneData(), cloneIndex(), getType()); return new DirectVarStraightSource(cloneData(), cloneIndex(), getType());
} }

View File

@ -33,8 +33,8 @@ import org.apache.lucene.document.PackedLongDocValuesField; // javadocs
import org.apache.lucene.document.ShortDocValuesField; // javadocs import org.apache.lucene.document.ShortDocValuesField; // javadocs
import org.apache.lucene.document.SortedBytesDocValuesField; // javadocs import org.apache.lucene.document.SortedBytesDocValuesField; // javadocs
import org.apache.lucene.document.StraightBytesDocValuesField; // javadocs import org.apache.lucene.document.StraightBytesDocValuesField; // javadocs
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
/** /**
@ -95,7 +95,6 @@ public abstract class DocValues implements Closeable {
private volatile SourceCache cache = new SourceCache.DirectSourceCache(); private volatile SourceCache cache = new SourceCache.DirectSourceCache();
private final Object cacheLock = new Object(); private final Object cacheLock = new Object();
/** Sole constructor. (For invocation by subclass /** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */ * constructors, typically implicit.) */
protected DocValues() { protected DocValues() {
@ -112,12 +111,12 @@ public abstract class DocValues implements Closeable {
* @see #getSource() * @see #getSource()
* @see #setCache(SourceCache) * @see #setCache(SourceCache)
*/ */
public abstract Source load() throws IOException; protected abstract Source loadSource() throws IOException;
/** /**
* Returns a {@link Source} instance through the current {@link SourceCache}. * Returns a {@link Source} instance through the current {@link SourceCache}.
* Iff no {@link Source} has been loaded into the cache so far the source will * Iff no {@link Source} has been loaded into the cache so far the source will
* be loaded through {@link #load()} and passed to the {@link SourceCache}. * be loaded through {@link #loadSource()} and passed to the {@link SourceCache}.
* The caller of this method should not close the obtained {@link Source} * The caller of this method should not close the obtained {@link Source}
* instance unless it is not needed for the rest of its life time. * instance unless it is not needed for the rest of its life time.
* <p> * <p>
@ -129,12 +128,30 @@ public abstract class DocValues implements Closeable {
public Source getSource() throws IOException { public Source getSource() throws IOException {
return cache.load(this); return cache.load(this);
} }
/**
* Returns a disk resident {@link Source} instance through the current
* {@link SourceCache}. Direct Sources are cached per thread in the
* {@link SourceCache}. The obtained instance should not be shared with other
* threads.
*/
public Source getDirectSource() throws IOException {
return this.cache.loadDirect(this);
}
/** /**
* Returns a disk resident {@link Source} instance. Direct Sources are not * Loads a new {@link Source direct source} instance from this {@link DocValues} field
* cached in the {@link SourceCache} and should not be shared between threads. * instance. Source instances returned from this method are not cached. It is
* the callers responsibility to maintain the instance and release its
* resources once the source is not needed anymore.
* <p>
* For managed {@link Source direct source} instances see {@link #getDirectSource()}.
*
* @see #getDirectSource()
* @see #setCache(SourceCache)
*/ */
public abstract Source getDirectSource() throws IOException; protected abstract Source loadDirectSource() throws IOException;
/** /**
* Returns the {@link Type} of this {@link DocValues} instance * Returns the {@link Type} of this {@link DocValues} instance
@ -163,10 +180,10 @@ public abstract class DocValues implements Closeable {
/** /**
* Sets the {@link SourceCache} used by this {@link DocValues} instance. This * Sets the {@link SourceCache} used by this {@link DocValues} instance. This
* method should be called before {@link #load()} is called. All {@link Source} instances in the currently used cache will be closed * method should be called before {@link #loadSource()} is called. All {@link Source} instances in the currently used cache will be closed
* before the new cache is installed. * before the new cache is installed.
* <p> * <p>
* Note: All instances previously obtained from {@link #load()} will be lost. * Note: All instances previously obtained from {@link #loadSource()} will be lost.
* *
* @throws IllegalArgumentException * @throws IllegalArgumentException
* if the given cache is <code>null</code> * if the given cache is <code>null</code>
@ -181,6 +198,14 @@ public abstract class DocValues implements Closeable {
toClose.close(this); toClose.close(this);
} }
} }
/**
* Returns the currently used cache instance;
* @see #setCache(SourceCache)
*/
// for tests
SourceCache getCache() {
return cache;
}
/** /**
* Source of per document values like long, double or {@link BytesRef} * Source of per document values like long, double or {@link BytesRef}
@ -687,9 +712,9 @@ public abstract class DocValues implements Closeable {
/** /**
* Abstract base class for {@link DocValues} {@link Source} cache. * Abstract base class for {@link DocValues} {@link Source} cache.
* <p> * <p>
* {@link Source} instances loaded via {@link DocValues#load()} are entirely memory resident * {@link Source} instances loaded via {@link DocValues#loadSource()} are entirely memory resident
* and need to be maintained by the caller. Each call to * and need to be maintained by the caller. Each call to
* {@link DocValues#load()} will cause an entire reload of * {@link DocValues#loadSource()} will cause an entire reload of
* the underlying data. Source instances obtained from * the underlying data. Source instances obtained from
* {@link DocValues#getSource()} and {@link DocValues#getSource()} * {@link DocValues#getSource()} and {@link DocValues#getSource()}
* respectively are maintained by a {@link SourceCache} that is closed ( * respectively are maintained by a {@link SourceCache} that is closed (
@ -721,6 +746,15 @@ public abstract class DocValues implements Closeable {
* This method will not return <code>null</code> * This method will not return <code>null</code>
*/ */
public abstract Source load(DocValues values) throws IOException; public abstract Source load(DocValues values) throws IOException;
/**
* Atomically loads a {@link Source direct source} into the per-thread cache from the given
* {@link DocValues} and returns it iff no other {@link Source direct source} has already
* been cached. Otherwise the cached source is returned.
* <p>
* This method will not return <code>null</code>
*/
public abstract Source loadDirect(DocValues values) throws IOException;
/** /**
* Atomically invalidates the cached {@link Source} * Atomically invalidates the cached {@link Source}
@ -744,20 +778,34 @@ public abstract class DocValues implements Closeable {
*/ */
public static final class DirectSourceCache extends SourceCache { public static final class DirectSourceCache extends SourceCache {
private Source ref; private Source ref;
private final CloseableThreadLocal<Source> directSourceCache = new CloseableThreadLocal<Source>();
/** Sole constructor. */ /** Sole constructor. */
public DirectSourceCache() { public DirectSourceCache() {
} }
public synchronized Source load(DocValues values) throws IOException { public synchronized Source load(DocValues values) throws IOException {
if (ref == null) { if (ref == null) {
ref = values.load(); ref = values.loadSource();
} }
return ref; return ref;
} }
public synchronized void invalidate(DocValues values) { public synchronized void invalidate(DocValues values) {
ref = null; ref = null;
directSourceCache.close();
}
@Override
public synchronized Source loadDirect(DocValues values) throws IOException {
final Source source = directSourceCache.get();
if (source == null) {
final Source loadDirectSource = values.loadDirectSource();
directSourceCache.set(loadDirectSource);
return loadDirectSource;
} else {
return source;
}
} }
} }
} }

View File

@ -185,7 +185,7 @@ class MultiDocValues extends DocValues {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
return new MultiSource(slices, starts, false, type); return new MultiSource(slices, starts, false, type);
} }
@ -199,7 +199,7 @@ class MultiDocValues extends DocValues {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
return emptySource; return emptySource;
} }
@ -209,7 +209,7 @@ class MultiDocValues extends DocValues {
} }
@Override @Override
public Source getDirectSource() throws IOException { protected Source loadDirectSource() throws IOException {
return emptySource; return emptySource;
} }
} }
@ -226,7 +226,7 @@ class MultiDocValues extends DocValues {
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
return emptyFixedSource; return emptyFixedSource;
} }
@ -241,7 +241,7 @@ class MultiDocValues extends DocValues {
} }
@Override @Override
public Source getDirectSource() throws IOException { protected Source loadDirectSource() throws IOException {
return emptyFixedSource; return emptyFixedSource;
} }
} }
@ -594,7 +594,7 @@ class MultiDocValues extends DocValues {
} }
@Override @Override
public Source getDirectSource() throws IOException { protected Source loadDirectSource() throws IOException {
return new MultiSource(slices, starts, true, type); return new MultiSource(slices, starts, true, type);
} }

View File

@ -425,8 +425,6 @@ public class TestDocValues extends LuceneTestCase {
private Source getSource(DocValues values) throws IOException { private Source getSource(DocValues values) throws IOException {
// getSource uses cache internally // getSource uses cache internally
switch(random().nextInt(5)) { switch(random().nextInt(5)) {
case 3:
return values.load();
case 2: case 2:
return values.getDirectSource(); return values.getDirectSource();
case 1: case 1:

View File

@ -47,7 +47,9 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.SourceCache;
import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValues.SourceCache.DirectSourceCache;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
@ -576,15 +578,26 @@ public class TestDocValuesIndexing extends LuceneTestCase {
} }
private DocValues getDocValues(IndexReader reader, String field) throws IOException { private DocValues getDocValues(IndexReader reader, String field) throws IOException {
return MultiDocValues.getDocValues(reader, field); final DocValues docValues = MultiDocValues.getDocValues(reader, field);
} if (docValues == null) {
return docValues;
}
if (rarely()) {
docValues.setCache(new NotCachingSourceCache());
} else {
if (!(docValues.getCache() instanceof DirectSourceCache)) {
docValues.setCache(new DirectSourceCache());
}
}
return docValues;
}
@SuppressWarnings("fallthrough") @SuppressWarnings("fallthrough")
private Source getSource(DocValues values) throws IOException { private Source getSource(DocValues values) throws IOException {
// getSource uses cache internally // getSource uses cache internally
switch(random().nextInt(5)) { switch(random().nextInt(5)) {
case 3: case 3:
return values.load(); return values.loadSource();
case 2: case 2:
return values.getDirectSource(); return values.getDirectSource();
case 1: case 1:
@ -764,7 +777,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.forceMerge(1); w.forceMerge(1);
DirectoryReader r = w.getReader(); DirectoryReader r = w.getReader();
w.close(); w.close();
assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0)); assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0));
r.close(); r.close();
d.close(); d.close();
} }
@ -791,7 +804,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.forceMerge(1); w.forceMerge(1);
DirectoryReader r = w.getReader(); DirectoryReader r = w.getReader();
w.close(); w.close();
assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0)); assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0));
r.close(); r.close();
d.close(); d.close();
} }
@ -1072,4 +1085,24 @@ public class TestDocValuesIndexing extends LuceneTestCase {
writer.close(); writer.close();
dir.close(); dir.close();
} }
/**
*
*/
public static class NotCachingSourceCache extends SourceCache {
@Override
public Source load(DocValues values) throws IOException {
return values.loadSource();
}
@Override
public Source loadDirect(DocValues values) throws IOException {
return values.loadDirectSource();
}
@Override
public void invalidate(DocValues values) {}
}
} }

View File

@ -33,12 +33,12 @@ class MemoryIndexNormDocValues extends DocValues {
this.source = source; this.source = source;
} }
@Override @Override
public Source load() throws IOException { protected Source loadSource() throws IOException {
return source; return source;
} }
@Override @Override
public Source getDirectSource() throws IOException { protected Source loadDirectSource() throws IOException {
return source; return source;
} }