LUCENE-4538: Cache DocValues DirectSource

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1406153 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-11-06 14:43:57 +00:00
parent d370453c58
commit bf107c5026
17 changed files with 136 additions and 48 deletions

View File

@ -161,6 +161,10 @@ Optimizations
posting lists. All index data is represented as consecutive byte/int arrays to
reduce GC cost and memory overhead. (Simon Willnauer)
* LUCENE-4538: DocValues now caches direct sources in a ThreadLocal exposed via SourceCache.
Users of this API can now simply obtain an instance via DocValues#getDirectSource per thread.
(Simon Willnauer)
Build
* Upgrade randomized testing to version 2.0.4: avoid hangs on shutdown

View File

@ -136,7 +136,7 @@ public class SimpleTextPerDocProducer extends PerDocProducerBase {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
boolean success = false;
IndexInput in = input.clone();
try {
@ -198,9 +198,14 @@ public class SimpleTextPerDocProducer extends PerDocProducerBase {
assert scratch.equals(END);
return reader.getSource();
}
@Override
public Source getDirectSource() throws IOException {
return this.getSource(); // don't cache twice
}
@Override
protected Source loadDirectSource() throws IOException {
return this.getSource();
}

View File

@ -308,7 +308,7 @@ public final class Bytes {
/**
* Opens all necessary files, but does not read any data in until you call
* {@link #load}.
* {@link #loadSource}.
*/
static abstract class BytesReaderBase extends DocValues {
protected final IndexInput idxIn;

View File

@ -79,12 +79,12 @@ class FixedDerefBytesImpl {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
return new FixedDerefSource(cloneData(), cloneIndex(), size, numValuesStored);
}
@Override
public Source getDirectSource()
protected Source loadDirectSource()
throws IOException {
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, getType());
}

View File

@ -135,13 +135,13 @@ class FixedSortedBytesImpl {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
return new FixedSortedSource(cloneData(), cloneIndex(), size, valueCount,
comparator);
}
@Override
public Source getDirectSource() throws IOException {
protected Source loadDirectSource() throws IOException {
return new DirectFixedSortedSource(cloneData(), cloneIndex(), size,
valueCount, comparator, type);
}

View File

@ -280,7 +280,7 @@ class FixedStraightBytesImpl {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
return size == 1 ? new SingleByteSource(cloneData(), maxDoc) :
new FixedStraightSource(cloneData(), size, maxDoc, type);
}
@ -291,7 +291,7 @@ class FixedStraightBytesImpl {
}
@Override
public Source getDirectSource() throws IOException {
protected Source loadDirectSource() throws IOException {
return new DirectFixedStraightSource(cloneData(), size, getType());
}

View File

@ -125,7 +125,7 @@ public class Floats {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
final IndexInput indexInput = cloneData();
try {
return arrayTemplate.newFromInput(indexInput, maxDoc);

View File

@ -149,7 +149,7 @@ public final class Ints {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
final IndexInput indexInput = cloneData();
try {
return arrayTemplate.newFromInput(indexInput, maxDoc);

View File

@ -149,7 +149,7 @@ class PackedIntValues {
/**
* Opens all necessary files, but does not read any data in until you call
* {@link #load}.
* {@link #loadSource}.
*/
static class PackedIntsReader extends DocValues {
private final IndexInput datIn;
@ -182,7 +182,7 @@ class PackedIntValues {
* already previously loaded but then discarded the Source.
*/
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
boolean success = false;
final Source source;
IndexInput input = null;
@ -217,7 +217,7 @@ class PackedIntValues {
@Override
public Source getDirectSource() throws IOException {
protected Source loadDirectSource() throws IOException {
return values != null ? new FixedStraightBytesImpl.DirectFixedStraightSource(datIn.clone(), 8, Type.FIXED_INTS_64) : new PackedIntsSource(datIn.clone(), true);
}
}

View File

@ -99,12 +99,12 @@ class VarDerefBytesImpl {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
return new VarDerefSource(cloneData(), cloneIndex(), totalBytes);
}
@Override
public Source getDirectSource()
protected Source loadDirectSource()
throws IOException {
return new DirectVarDerefSource(cloneData(), cloneIndex(), getType());
}

View File

@ -161,13 +161,13 @@ final class VarSortedBytesImpl {
}
@Override
public org.apache.lucene.index.DocValues.Source load()
public org.apache.lucene.index.DocValues.Source loadSource()
throws IOException {
return new VarSortedSource(cloneData(), cloneIndex(), comparator);
}
@Override
public Source getDirectSource() throws IOException {
protected Source loadDirectSource() throws IOException {
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, getType());
}

View File

@ -247,12 +247,12 @@ class VarStraightBytesImpl {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
return new VarStraightSource(cloneData(), cloneIndex());
}
@Override
public Source getDirectSource()
protected Source loadDirectSource()
throws IOException {
return new DirectVarStraightSource(cloneData(), cloneIndex(), getType());
}

View File

@ -33,8 +33,8 @@ import org.apache.lucene.document.PackedLongDocValuesField; // javadocs
import org.apache.lucene.document.ShortDocValuesField; // javadocs
import org.apache.lucene.document.SortedBytesDocValuesField; // javadocs
import org.apache.lucene.document.StraightBytesDocValuesField; // javadocs
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.packed.PackedInts;
/**
@ -95,7 +95,6 @@ public abstract class DocValues implements Closeable {
private volatile SourceCache cache = new SourceCache.DirectSourceCache();
private final Object cacheLock = new Object();
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
protected DocValues() {
@ -112,12 +111,12 @@ public abstract class DocValues implements Closeable {
* @see #getSource()
* @see #setCache(SourceCache)
*/
public abstract Source load() throws IOException;
protected abstract Source loadSource() throws IOException;
/**
* Returns a {@link Source} instance through the current {@link SourceCache}.
* Iff no {@link Source} has been loaded into the cache so far the source will
* be loaded through {@link #load()} and passed to the {@link SourceCache}.
* be loaded through {@link #loadSource()} and passed to the {@link SourceCache}.
* The caller of this method should not close the obtained {@link Source}
* instance unless it is not needed for the rest of its life time.
* <p>
@ -129,12 +128,30 @@ public abstract class DocValues implements Closeable {
public Source getSource() throws IOException {
return cache.load(this);
}
/**
* Returns a disk resident {@link Source} instance through the current
* {@link SourceCache}. Direct Sources are cached per thread in the
* {@link SourceCache}. The obtained instance should not be shared with other
* threads.
*/
public Source getDirectSource() throws IOException {
return this.cache.loadDirect(this);
}
/**
* Returns a disk resident {@link Source} instance. Direct Sources are not
* cached in the {@link SourceCache} and should not be shared between threads.
* Loads a new {@link Source direct source} instance from this {@link DocValues} field
* instance. Source instances returned from this method are not cached. It is
* the callers responsibility to maintain the instance and release its
* resources once the source is not needed anymore.
* <p>
* For managed {@link Source direct source} instances see {@link #getDirectSource()}.
*
* @see #getDirectSource()
* @see #setCache(SourceCache)
*/
public abstract Source getDirectSource() throws IOException;
protected abstract Source loadDirectSource() throws IOException;
/**
* Returns the {@link Type} of this {@link DocValues} instance
@ -163,10 +180,10 @@ public abstract class DocValues implements Closeable {
/**
* Sets the {@link SourceCache} used by this {@link DocValues} instance. This
* method should be called before {@link #load()} is called. All {@link Source} instances in the currently used cache will be closed
* method should be called before {@link #loadSource()} is called. All {@link Source} instances in the currently used cache will be closed
* before the new cache is installed.
* <p>
* Note: All instances previously obtained from {@link #load()} will be lost.
* Note: All instances previously obtained from {@link #loadSource()} will be lost.
*
* @throws IllegalArgumentException
* if the given cache is <code>null</code>
@ -181,6 +198,14 @@ public abstract class DocValues implements Closeable {
toClose.close(this);
}
}
/**
* Returns the currently used cache instance;
* @see #setCache(SourceCache)
*/
// for tests
SourceCache getCache() {
return cache;
}
/**
* Source of per document values like long, double or {@link BytesRef}
@ -687,9 +712,9 @@ public abstract class DocValues implements Closeable {
/**
* Abstract base class for {@link DocValues} {@link Source} cache.
* <p>
* {@link Source} instances loaded via {@link DocValues#load()} are entirely memory resident
* {@link Source} instances loaded via {@link DocValues#loadSource()} are entirely memory resident
* and need to be maintained by the caller. Each call to
* {@link DocValues#load()} will cause an entire reload of
* {@link DocValues#loadSource()} will cause an entire reload of
* the underlying data. Source instances obtained from
* {@link DocValues#getSource()} and {@link DocValues#getSource()}
* respectively are maintained by a {@link SourceCache} that is closed (
@ -721,6 +746,15 @@ public abstract class DocValues implements Closeable {
* This method will not return <code>null</code>
*/
public abstract Source load(DocValues values) throws IOException;
/**
* Atomically loads a {@link Source direct source} into the per-thread cache from the given
* {@link DocValues} and returns it iff no other {@link Source direct source} has already
* been cached. Otherwise the cached source is returned.
* <p>
* This method will not return <code>null</code>
*/
public abstract Source loadDirect(DocValues values) throws IOException;
/**
* Atomically invalidates the cached {@link Source}
@ -744,20 +778,34 @@ public abstract class DocValues implements Closeable {
*/
public static final class DirectSourceCache extends SourceCache {
private Source ref;
private final CloseableThreadLocal<Source> directSourceCache = new CloseableThreadLocal<Source>();
/** Sole constructor. */
public DirectSourceCache() {
}
public synchronized Source load(DocValues values) throws IOException {
if (ref == null) {
ref = values.load();
ref = values.loadSource();
}
return ref;
}
public synchronized void invalidate(DocValues values) {
ref = null;
directSourceCache.close();
}
@Override
public synchronized Source loadDirect(DocValues values) throws IOException {
final Source source = directSourceCache.get();
if (source == null) {
final Source loadDirectSource = values.loadDirectSource();
directSourceCache.set(loadDirectSource);
return loadDirectSource;
} else {
return source;
}
}
}
}

View File

@ -185,7 +185,7 @@ class MultiDocValues extends DocValues {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
return new MultiSource(slices, starts, false, type);
}
@ -199,7 +199,7 @@ class MultiDocValues extends DocValues {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
return emptySource;
}
@ -209,7 +209,7 @@ class MultiDocValues extends DocValues {
}
@Override
public Source getDirectSource() throws IOException {
protected Source loadDirectSource() throws IOException {
return emptySource;
}
}
@ -226,7 +226,7 @@ class MultiDocValues extends DocValues {
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
return emptyFixedSource;
}
@ -241,7 +241,7 @@ class MultiDocValues extends DocValues {
}
@Override
public Source getDirectSource() throws IOException {
protected Source loadDirectSource() throws IOException {
return emptyFixedSource;
}
}
@ -594,7 +594,7 @@ class MultiDocValues extends DocValues {
}
@Override
public Source getDirectSource() throws IOException {
protected Source loadDirectSource() throws IOException {
return new MultiSource(slices, starts, true, type);
}

View File

@ -425,8 +425,6 @@ public class TestDocValues extends LuceneTestCase {
private Source getSource(DocValues values) throws IOException {
// getSource uses cache internally
switch(random().nextInt(5)) {
case 3:
return values.load();
case 2:
return values.getDirectSource();
case 1:

View File

@ -47,7 +47,9 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.SourceCache;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValues.SourceCache.DirectSourceCache;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
@ -576,15 +578,26 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
private DocValues getDocValues(IndexReader reader, String field) throws IOException {
return MultiDocValues.getDocValues(reader, field);
}
final DocValues docValues = MultiDocValues.getDocValues(reader, field);
if (docValues == null) {
return docValues;
}
if (rarely()) {
docValues.setCache(new NotCachingSourceCache());
} else {
if (!(docValues.getCache() instanceof DirectSourceCache)) {
docValues.setCache(new DirectSourceCache());
}
}
return docValues;
}
@SuppressWarnings("fallthrough")
private Source getSource(DocValues values) throws IOException {
// getSource uses cache internally
switch(random().nextInt(5)) {
case 3:
return values.load();
return values.loadSource();
case 2:
return values.getDirectSource();
case 1:
@ -764,7 +777,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.forceMerge(1);
DirectoryReader r = w.getReader();
w.close();
assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0));
assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0));
r.close();
d.close();
}
@ -791,7 +804,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.forceMerge(1);
DirectoryReader r = w.getReader();
w.close();
assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0));
assertEquals(17, getOnlySegmentReader(r).docValues("field").loadSource().getInt(0));
r.close();
d.close();
}
@ -1072,4 +1085,24 @@ public class TestDocValuesIndexing extends LuceneTestCase {
writer.close();
dir.close();
}
/**
*
*/
public static class NotCachingSourceCache extends SourceCache {
@Override
public Source load(DocValues values) throws IOException {
return values.loadSource();
}
@Override
public Source loadDirect(DocValues values) throws IOException {
return values.loadDirectSource();
}
@Override
public void invalidate(DocValues values) {}
}
}

View File

@ -33,12 +33,12 @@ class MemoryIndexNormDocValues extends DocValues {
this.source = source;
}
@Override
public Source load() throws IOException {
protected Source loadSource() throws IOException {
return source;
}
@Override
public Source getDirectSource() throws IOException {
protected Source loadDirectSource() throws IOException {
return source;
}