LUCENE-1609: remove synchronization from terms dict by loading index up front

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@793104 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-07-10 20:48:36 +00:00
parent 4a63ea0f6c
commit a284608778
12 changed files with 336 additions and 211 deletions

View File

@ -295,6 +295,14 @@ API Changes
new ctors taking field and term ranges as Strings (see also
LUCENE-1424). (Uwe Schindler)
29. LUCENE-1609: The termInfosIndexDivisor must now be specified
up-front when opening the IndexReader. Attempts to call
IndexReader.setTermInfosIndexDivisor will hit an
UnsupportedOperationException. This was done to enable removal of
all synchronization in TermInfosReader, which previously could
cause threads to pile up in certain cases. (Dan Rosher via Mike
McCandless)
Bug fixes
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
@ -361,6 +369,9 @@ Bug fixes
on this functionality and does not work correctly without it.
(Billow Gao, Mark Miller)
18. LUCENE-1718: Fix termInfosIndexDivisor to carry over to reopened
readers (Mike McCandless)
New features
1. LUCENE-1411: Added expert API to open an IndexWriter on a prior

View File

@ -42,7 +42,7 @@
<property name="Name" value="Lucene"/>
<property name="dev.version" value="2.9-dev"/>
<property name="version" value="${dev.version}"/>
<property name="compatibility.tag" value="lucene_2_4_back_compat_tests_20090710"/>
<property name="compatibility.tag" value="lucene_2_4_back_compat_tests_20090710a"/>
<property name="spec.version" value="${version}"/>
<property name="year" value="2000-${current.year}"/>
<property name="final.name" value="lucene-${name}-${version}"/>

View File

@ -51,6 +51,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
private Lock writeLock;
private SegmentInfos segmentInfos;
private boolean stale;
private final int termInfosIndexDivisor;
private boolean rollbackHasChanges;
private SegmentInfos rollbackSegmentInfos;
@ -62,25 +63,27 @@ class DirectoryReader extends IndexReader implements Cloneable {
private int numDocs = -1;
private boolean hasDeletions = false;
static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException {
static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly,
final int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) {
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(directory, segmentFileName);
if (readOnly)
return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy);
return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor);
else
return new DirectoryReader(directory, infos, deletionPolicy, false);
return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor);
}
}.run(commit);
}
/** Construct reading the named set of readers. */
DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws IOException {
DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws IOException {
this.directory = directory;
this.readOnly = readOnly;
this.segmentInfos = sis;
this.deletionPolicy = deletionPolicy;
this.termInfosIndexDivisor = termInfosIndexDivisor;
if (!readOnly) {
// We assume that this segments_N was previously
@ -97,7 +100,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
for (int i = sis.size()-1; i >= 0; i--) {
boolean success = false;
try {
readers[i] = SegmentReader.get(readOnly, sis.info(i));
readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor);
success = true;
} finally {
if (!success) {
@ -117,10 +120,11 @@ class DirectoryReader extends IndexReader implements Cloneable {
}
// Used by near real-time search
DirectoryReader(IndexWriter writer, SegmentInfos infos) throws IOException {
DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) throws IOException {
this.directory = writer.getDirectory();
this.readOnly = true;
this.segmentInfos = infos;
this.termInfosIndexDivisor = termInfosIndexDivisor;
if (!readOnly) {
// We assume that this segments_N was previously
// properly sync'd:
@ -140,7 +144,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
try {
final SegmentInfo info = infos.info(upto);
if (info.dir == dir) {
readers[upto++] = writer.readerPool.getReadOnlyClone(info, true);
readers[upto++] = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor);
}
success = true;
} finally {
@ -171,10 +175,11 @@ class DirectoryReader extends IndexReader implements Cloneable {
/** This contructor is only used for {@link #reopen()} */
DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
Map oldNormsCache, boolean readOnly, boolean doClone) throws IOException {
Map oldNormsCache, boolean readOnly, boolean doClone, int termInfosIndexDivisor) throws IOException {
this.directory = directory;
this.readOnly = readOnly;
this.segmentInfos = infos;
this.termInfosIndexDivisor = termInfosIndexDivisor;
if (!readOnly) {
// We assume that this segments_N was previously
// properly sync'd:
@ -218,7 +223,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
assert !doClone;
// this is a new reader; in case we hit an exception we can close it safely
newReader = SegmentReader.get(readOnly, infos.info(i));
newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor);
} else {
newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly);
}
@ -426,10 +431,10 @@ class DirectoryReader extends IndexReader implements Cloneable {
private synchronized DirectoryReader doReopen(SegmentInfos infos, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
DirectoryReader reader;
if (openReadOnly) {
reader = new ReadOnlyDirectoryReader(directory, infos, subReaders, starts, normsCache, doClone);
if (openReadOnly) {
reader = new ReadOnlyDirectoryReader(directory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor);
} else {
reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, false, doClone);
reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor);
}
reader.setDisableFakeNorms(getDisableFakeNorms());
return reader;
@ -827,18 +832,6 @@ class DirectoryReader extends IndexReader implements Cloneable {
return subReaders;
}
public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].setTermInfosIndexDivisor(indexDivisor);
}
public int getTermInfosIndexDivisor() throws IllegalStateException {
if (subReaders.length > 0)
return subReaders[0].getTermInfosIndexDivisor();
else
throw new IllegalStateException("no readers");
}
public void setDisableFakeNorms(boolean disableFakeNorms) {
super.setDisableFakeNorms(disableFakeNorms);
for (int i = 0; i < subReaders.length; i++)

View File

@ -115,6 +115,8 @@ public abstract class IndexReader implements Cloneable {
private int refCount;
static int DEFAULT_TERMS_INDEX_DIVISOR = 1;
private boolean disableFakeNorms = false;
/** Expert: returns the current refCount for this reader */
@ -228,7 +230,7 @@ public abstract class IndexReader implements Cloneable {
final Directory dir = FSDirectory.getDirectory(path);
IndexReader r = null;
try {
r = open(dir, null, null, readOnly);
r = open(dir, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
} finally {
if (r == null)
dir.close();
@ -265,7 +267,7 @@ public abstract class IndexReader implements Cloneable {
final Directory dir = FSDirectory.getDirectory(path);
IndexReader r = null;
try {
r = open(dir, null, null, readOnly);
r = open(dir, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
} finally {
if (r == null)
dir.close();
@ -282,7 +284,7 @@ public abstract class IndexReader implements Cloneable {
* Use {@link #open(Directory, boolean)} instead
*/
public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException {
return open(directory, null, null, false);
return open(directory, null, null, false, DEFAULT_TERMS_INDEX_DIVISOR);
}
/** Returns an IndexReader reading the index in the given
@ -296,7 +298,7 @@ public abstract class IndexReader implements Cloneable {
* @throws IOException if there is a low-level IO error
*/
public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException {
return open(directory, null, null, readOnly);
return open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
}
/** Expert: returns a read/write IndexReader reading the index in the given
@ -308,7 +310,7 @@ public abstract class IndexReader implements Cloneable {
* @throws IOException if there is a low-level IO error
*/
public static IndexReader open(final IndexCommit commit) throws CorruptIndexException, IOException {
return open(commit.getDirectory(), null, commit, false);
return open(commit.getDirectory(), null, commit, false, DEFAULT_TERMS_INDEX_DIVISOR);
}
/** Expert: returns an IndexReader reading the index in the given
@ -322,7 +324,7 @@ public abstract class IndexReader implements Cloneable {
* @throws IOException if there is a low-level IO error
*/
public static IndexReader open(final IndexCommit commit, boolean readOnly) throws CorruptIndexException, IOException {
return open(commit.getDirectory(), null, commit, readOnly);
return open(commit.getDirectory(), null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
}
/** Expert: returns a read/write IndexReader reading the index in the given
@ -337,7 +339,7 @@ public abstract class IndexReader implements Cloneable {
* @throws IOException if there is a low-level IO error
*/
public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException {
return open(directory, deletionPolicy, null, false);
return open(directory, deletionPolicy, null, false, DEFAULT_TERMS_INDEX_DIVISOR);
}
/** Expert: returns an IndexReader reading the index in
@ -355,7 +357,35 @@ public abstract class IndexReader implements Cloneable {
* @throws IOException if there is a low-level IO error
*/
public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
return open(directory, deletionPolicy, null, readOnly);
return open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
}
/** Expert: returns an IndexReader reading the index in
* the given Directory, with a custom {@link
* IndexDeletionPolicy}. You should pass readOnly=true,
* since it gives much better concurrent performance,
* unless you intend to do write operations (delete
* documents or change norms) with the reader.
* @param directory the index directory
* @param deletionPolicy a custom deletion policy (only used
* if you use this reader to perform deletes or to set
* norms); see {@link IndexWriter} for details.
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsambles which indexed
* terms are loaded into RAM. This has the same effect as {@link
* IndexWriter#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
* memory. By setting this to a value > 1 you can reduce
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
* to -1 to skip loading the terms index entirely.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor);
}
/** Expert: returns a read/write IndexReader reading the index in the given
@ -373,7 +403,7 @@ public abstract class IndexReader implements Cloneable {
* @throws IOException if there is a low-level IO error
*/
public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException {
return open(commit.getDirectory(), deletionPolicy, commit, false);
return open(commit.getDirectory(), deletionPolicy, commit, false, DEFAULT_TERMS_INDEX_DIVISOR);
}
/** Expert: returns an IndexReader reading the index in
@ -393,11 +423,41 @@ public abstract class IndexReader implements Cloneable {
* @throws IOException if there is a low-level IO error
*/
public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException {
return open(commit.getDirectory(), deletionPolicy, commit, readOnly);
return open(commit.getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR);
}
private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException {
return DirectoryReader.open(directory, deletionPolicy, commit, readOnly);
/** Expert: returns an IndexReader reading the index in
* the given Directory, using a specific commit and with
* a custom {@link IndexDeletionPolicy}. You should pass
* readOnly=true, since it gives much better concurrent
* performance, unless you intend to do write operations
* (delete documents or change norms) with the reader.
* @param commit the specific {@link IndexCommit} to open;
* see {@link IndexReader#listCommits} to list all commits
* in a directory
* @param deletionPolicy a custom deletion policy (only used
* if you use this reader to perform deletes or to set
* norms); see {@link IndexWriter} for details.
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsambles which indexed
* terms are loaded into RAM. This has the same effect as {@link
* IndexWriter#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
* memory. By setting this to a value > 1 you can reduce
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
* to -1 to skip loading the terms index entirely.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return open(commit.getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor);
}
private static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return DirectoryReader.open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor);
}
/**
@ -668,17 +728,20 @@ public abstract class IndexReader implements Cloneable {
* index is loaded. If the index is already loaded,
* an IllegalStateException is thrown.
* @throws IllegalStateException if the term index has already been loaded into memory
* @deprecated Please use {@link IndexReader#open(Directory, IndexDeletionPolicy, boolean, int)} to specify the required TermInfos index divisor instead.
*/
public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
throw new UnsupportedOperationException("This reader does not support this method.");
throw new UnsupportedOperationException("Please pass termInfosIndexDivisor up-front when opening IndexReader");
}
/** <p>For IndexReader implementations that use
* TermInfosReader to read terms, this returns the
* current indexDivisor.
* @see #setTermInfosIndexDivisor */
* @see #setTermInfosIndexDivisor
* @deprecated Please use {@link IndexReader#open(Directory, IndexDeletionPolicy, boolean, int)} to specify the required TermInfos index divisor instead.
*/
public int getTermInfosIndexDivisor() {
throw new UnsupportedOperationException("This reader does not support this method.");
throw new UnsupportedOperationException("Please pass termInfosIndexDivisor up-front when opening IndexReader");
}
/**

View File

@ -425,6 +425,23 @@ public class IndexWriter {
* @throws IOException
*/
public IndexReader getReader() throws IOException {
return getReader(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
}
/** Expert: like {@link #getReader}, except you can
* specify which termInfosIndexDivisor should be used for
* any newly opened readers.
* @param termInfosIndexDivisor Subsambles which indexed
* terms are loaded into RAM. This has the same effect as {@link
* IndexWriter#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
* memory. By setting this to a value > 1 you can reduce
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
* to -1 to skip loading the terms index entirely. */
public IndexReader getReader(int termInfosIndexDivisor) throws IOException {
if (infoStream != null) {
message("flush at getReader");
}
@ -440,7 +457,7 @@ public class IndexWriter {
// reader; in theory we could do similar retry logic,
// just like we do when loading segments_N
synchronized(this) {
return new ReadOnlyDirectoryReader(this, segmentInfos);
return new ReadOnlyDirectoryReader(this, segmentInfos, termInfosIndexDivisor);
}
}
@ -590,8 +607,8 @@ public class IndexWriter {
// Returns a ref to a clone. NOTE: this clone is not
// enrolled in the pool, so you should simply close()
// it when you're done (ie, do not call release()).
public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores) throws IOException {
SegmentReader sr = get(info, doOpenStores);
public synchronized SegmentReader getReadOnlyClone(SegmentInfo info, boolean doOpenStores, int termInfosIndexDivisor) throws IOException {
SegmentReader sr = get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, termInfosIndexDivisor);
try {
return (SegmentReader) sr.clone(true);
} finally {
@ -601,10 +618,10 @@ public class IndexWriter {
// Returns a ref
public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores) throws IOException {
return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE);
return get(info, doOpenStores, BufferedIndexInput.BUFFER_SIZE, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
}
public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize) throws IOException {
public synchronized SegmentReader get(SegmentInfo info, boolean doOpenStores, int readBufferSize, int termsIndexDivisor) throws IOException {
if (poolReaders) {
readBufferSize = BufferedIndexInput.BUFFER_SIZE;
@ -615,10 +632,21 @@ public class IndexWriter {
// TODO: we may want to avoid doing this while
// synchronized
// Returns a ref, which we xfer to readerMap:
sr = SegmentReader.get(info, readBufferSize, doOpenStores);
sr = SegmentReader.get(info, readBufferSize, doOpenStores, termsIndexDivisor);
readerMap.put(info, sr);
} else if (doOpenStores) {
sr.openDocStores();
} else {
if (doOpenStores) {
sr.openDocStores();
}
if (termsIndexDivisor != -1 && !sr.termsIndexLoaded()) {
// If this reader was originally opened because we
// needed to merge it, we didn't load the terms
// index. But now, if the caller wants the terms
// index (eg because it's doing deletes, or an NRT
// reader is being opened) we ask the reader to
// load its terms index.
sr.loadTermsIndex(termsIndexDivisor);
}
}
// Return a ref to our caller
@ -3769,7 +3797,7 @@ public class IndexWriter {
SegmentReader sReader = null;
synchronized(this) {
if (segmentInfos.size() == 1) { // add existing index, if any
sReader = readerPool.get(segmentInfos.info(0), true);
sReader = readerPool.get(segmentInfos.info(0), true, BufferedIndexInput.BUFFER_SIZE, -1);
}
}
@ -4867,7 +4895,8 @@ public class IndexWriter {
// Hold onto the "live" reader; we will use this to
// commit merged deletes
SegmentReader reader = merge.readers[i] = readerPool.get(info, merge.mergeDocStores,
MERGE_READ_BUFFER_SIZE);
MERGE_READ_BUFFER_SIZE,
-1);
// We clone the segment readers because other
// deletes may come in while we're merging so we
@ -4923,7 +4952,7 @@ public class IndexWriter {
// keep deletes (it's costly to open entire reader
// when we just need deletes)
final SegmentReader mergedReader = readerPool.get(merge.info, false);
final SegmentReader mergedReader = readerPool.get(merge.info, false, BufferedIndexInput.BUFFER_SIZE, -1);
try {
if (poolReaders && mergedSegmentWarmer != null) {
mergedSegmentWarmer.warm(mergedReader);

View File

@ -23,16 +23,17 @@ import java.io.IOException;
import java.util.Map;
class ReadOnlyDirectoryReader extends DirectoryReader {
ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy) throws IOException {
super(directory, sis, deletionPolicy, true);
ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor) throws IOException {
super(directory, sis, deletionPolicy, true, termInfosIndexDivisor);
}
ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean doClone) throws IOException {
super(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone);
ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, Map oldNormsCache, boolean doClone,
int termInfosIndexDivisor) throws IOException {
super(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor);
}
ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos) throws IOException {
super(writer, infos);
ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) throws IOException {
super(writer, infos, termInfosIndexDivisor);
}
protected void acquireWriteLock() {

View File

@ -81,10 +81,12 @@ class SegmentReader extends IndexReader implements Cloneable {
final FieldInfos fieldInfos;
final IndexInput freqStream;
final IndexInput proxStream;
final TermInfosReader tisNoIndex;
final Directory dir;
final Directory cfsDir;
final int readBufferSize;
final int termsIndexDivisor;
TermInfosReader tis;
FieldsReader fieldsReaderOrig;
@ -92,7 +94,7 @@ class SegmentReader extends IndexReader implements Cloneable {
CompoundFileReader cfsReader;
CompoundFileReader storeCFSReader;
CoreReaders(Directory dir, SegmentInfo si, int readBufferSize) throws IOException {
CoreReaders(Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException {
segment = si.name;
this.readBufferSize = readBufferSize;
this.dir = dir;
@ -109,7 +111,14 @@ class SegmentReader extends IndexReader implements Cloneable {
fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
this.termsIndexDivisor = termsIndexDivisor;
TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor);
if (termsIndexDivisor == -1) {
tisNoIndex = reader;
} else {
tis = reader;
tisNoIndex = null;
}
// make sure that all index files have been read or are kept open
// so that if an index update removes them we'll still have them
@ -144,6 +153,43 @@ class SegmentReader extends IndexReader implements Cloneable {
return cfsReader;
}
synchronized TermInfosReader getTermsReader() {
if (tis != null) {
return tis;
} else {
return tisNoIndex;
}
}
synchronized boolean termsIndexIsLoaded() {
return tis != null;
}
// NOTE: only called from IndexWriter when a near
// real-time reader is opened, or applyDeletes is run,
// sharing a segment that's still being merged. This
// method is not fully thread safe, and relies on the
// synchronization in IndexWriter
synchronized void loadTermsIndex(SegmentInfo si, int termsIndexDivisor) throws IOException {
if (tis == null) {
Directory dir0;
if (si.getUseCompoundFile()) {
// In some cases, we were originally opened when CFS
// was not used, but then we are asked to open the
// terms reader with index, the segment has switched
// to CFS
if (cfsReader == null) {
cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
}
dir0 = cfsReader;
} else {
dir0 = dir;
}
tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor);
}
}
synchronized void decRef() throws IOException {
if (ref.decRef() == 0) {
@ -155,6 +201,10 @@ class SegmentReader extends IndexReader implements Cloneable {
tis = null;
}
if (tisNoIndex != null) {
tisNoIndex.close();
}
if (freqStream != null) {
freqStream.close();
}
@ -534,15 +584,15 @@ class SegmentReader extends IndexReader implements Cloneable {
* @deprecated
*/
public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true);
return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
}
/**
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public static SegmentReader get(boolean readOnly, SegmentInfo si) throws CorruptIndexException, IOException {
return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true);
public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
}
/**
@ -550,8 +600,8 @@ class SegmentReader extends IndexReader implements Cloneable {
* @throws IOException if there is a low-level IO error
* @deprecated
*/
static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
return get(false, si.dir, si, readBufferSize, doOpenStores);
static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor);
}
/**
@ -562,7 +612,8 @@ class SegmentReader extends IndexReader implements Cloneable {
Directory dir,
SegmentInfo si,
int readBufferSize,
boolean doOpenStores)
boolean doOpenStores,
int termInfosIndexDivisor)
throws CorruptIndexException, IOException {
SegmentReader instance;
try {
@ -580,7 +631,7 @@ class SegmentReader extends IndexReader implements Cloneable {
boolean success = false;
try {
instance.core = new CoreReaders(dir, si, readBufferSize);
instance.core = new CoreReaders(dir, si, readBufferSize, termInfosIndexDivisor);
if (doOpenStores) {
instance.core.openDocStores(si);
}
@ -876,12 +927,12 @@ class SegmentReader extends IndexReader implements Cloneable {
public TermEnum terms() {
ensureOpen();
return core.tis.terms();
return core.getTermsReader().terms();
}
public TermEnum terms(Term t) throws IOException {
ensureOpen();
return core.tis.terms(t);
return core.getTermsReader().terms(t);
}
FieldInfos fieldInfos() {
@ -917,7 +968,7 @@ class SegmentReader extends IndexReader implements Cloneable {
public int docFreq(Term t) throws IOException {
ensureOpen();
TermInfo ti = core.tis.get(t);
TermInfo ti = core.getTermsReader().get(t);
if (ti != null)
return ti.docFreq;
else
@ -937,14 +988,6 @@ class SegmentReader extends IndexReader implements Cloneable {
return si.docCount;
}
public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException {
core.tis.setIndexDivisor(indexDivisor);
}
public int getTermInfosIndexDivisor() {
return core.tis.getIndexDivisor();
}
/**
* @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
*/
@ -1099,6 +1142,19 @@ class SegmentReader extends IndexReader implements Cloneable {
}
}
boolean termsIndexLoaded() {
return core.termsIndexIsLoaded();
}
// NOTE: only called from IndexWriter when a near
// real-time reader is opened, or applyDeletes is run,
// sharing a segment that's still being merged. This
// method is not thread safe, and relies on the
// synchronization in IndexWriter
void loadTermsIndex(int termsIndexDivisor) throws IOException {
core.loadTermsIndex(si, termsIndexDivisor);
}
// for testing only
boolean normsClosed() {
if (singleNormStream != null) {
@ -1268,9 +1324,16 @@ class SegmentReader extends IndexReader implements Cloneable {
}
public long getUniqueTermCount() {
return core.tis.size();
return core.getTermsReader().size();
}
/*
// nocommit
final TermInfosReader getTermInfosReader() {
return terms.getTermsReader();
}
*/
/**
* Lotsa tests did hacks like:<br/>
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);<br/>

View File

@ -49,12 +49,12 @@ class SegmentTermDocs implements TermDocs {
synchronized (parent) {
this.deletedDocs = parent.deletedDocs;
}
this.skipInterval = parent.core.tis.getSkipInterval();
this.maxSkipLevels = parent.core.tis.getMaxSkipLevels();
this.skipInterval = parent.core.getTermsReader().getSkipInterval();
this.maxSkipLevels = parent.core.getTermsReader().getMaxSkipLevels();
}
public void seek(Term term) throws IOException {
TermInfo ti = parent.core.tis.get(term);
TermInfo ti = parent.core.getTermsReader().get(term);
seek(ti, term);
}
@ -69,7 +69,7 @@ class SegmentTermDocs implements TermDocs {
ti = segmentTermEnum.termInfo();
} else { // punt case
term = termEnum.term();
ti = parent.core.tis.get(term);
ti = parent.core.getTermsReader().get(term);
}
seek(ti, term);

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.util.cache.Cache;
import org.apache.lucene.util.cache.SimpleLRUCache;
import org.apache.lucene.util.CloseableThreadLocal;
@ -30,22 +29,19 @@ import org.apache.lucene.util.CloseableThreadLocal;
* set. */
final class TermInfosReader {
private Directory directory;
private String segment;
private FieldInfos fieldInfos;
private final Directory directory;
private final String segment;
private final FieldInfos fieldInfos;
private CloseableThreadLocal threadResources = new CloseableThreadLocal();
private SegmentTermEnum origEnum;
private long size;
private final CloseableThreadLocal threadResources = new CloseableThreadLocal();
private final SegmentTermEnum origEnum;
private final long size;
private Term[] indexTerms = null;
private TermInfo[] indexInfos;
private long[] indexPointers;
private final Term[] indexTerms;
private final TermInfo[] indexInfos;
private final long[] indexPointers;
private SegmentTermEnum indexEnum;
private int indexDivisor = 1;
private int totalIndexInterval;
private final int totalIndexInterval;
private final static int DEFAULT_CACHE_SIZE = 1024;
@ -59,15 +55,14 @@ final class TermInfosReader {
Cache termInfoCache;
}
TermInfosReader(Directory dir, String seg, FieldInfos fis)
throws CorruptIndexException, IOException {
this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE);
}
TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize)
TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
throws CorruptIndexException, IOException {
boolean success = false;
if (indexDivisor < 1 && indexDivisor != -1) {
throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
}
try {
directory = dir;
segment = seg;
@ -76,11 +71,40 @@ final class TermInfosReader {
origEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_EXTENSION,
readBufferSize), fieldInfos, false);
size = origEnum.size;
totalIndexInterval = origEnum.indexInterval;
indexEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION,
readBufferSize), fieldInfos, true);
if (indexDivisor != -1) {
// Load terms index
totalIndexInterval = origEnum.indexInterval * indexDivisor;
final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION,
readBufferSize), fieldInfos, true);
try {
int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index
indexTerms = new Term[indexSize];
indexInfos = new TermInfo[indexSize];
indexPointers = new long[indexSize];
for (int i = 0; indexEnum.next(); i++) {
indexTerms[i] = indexEnum.term();
indexInfos[i] = indexEnum.termInfo();
indexPointers[i] = indexEnum.indexPointer;
for (int j = 1; j < indexDivisor; j++)
if (!indexEnum.next())
break;
}
} finally {
indexEnum.close();
}
} else {
// Do not load terms index:
totalIndexInterval = -1;
indexTerms = null;
indexInfos = null;
indexPointers = null;
}
success = true;
} finally {
// With lock-less commits, it's entirely possible (and
@ -102,48 +126,9 @@ final class TermInfosReader {
return origEnum.maxSkipLevels;
}
/**
* <p>Sets the indexDivisor, which subsamples the number
* of indexed terms loaded into memory. This has a
* similar effect as {@link
* IndexWriter#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
* memory. By setting this to a value > 1 you can reduce
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1.</p>
*
* <b>NOTE:</b> you must call this before the term
* index is loaded. If the index is already loaded,
* an IllegalStateException is thrown.
*
+ @throws IllegalStateException if the term index has
* already been loaded into memory.
*/
public void setIndexDivisor(int indexDivisor) throws IllegalStateException {
if (indexDivisor < 1)
throw new IllegalArgumentException("indexDivisor must be > 0: got " + indexDivisor);
if (indexTerms != null)
throw new IllegalStateException("index terms are already loaded");
this.indexDivisor = indexDivisor;
totalIndexInterval = origEnum.indexInterval * indexDivisor;
}
/** Returns the indexDivisor.
* @see #setIndexDivisor
*/
public int getIndexDivisor() {
return indexDivisor;
}
final void close() throws IOException {
if (origEnum != null)
origEnum.close();
if (indexEnum != null)
indexEnum.close();
threadResources.close();
}
@ -164,30 +149,6 @@ final class TermInfosReader {
return resources;
}
private synchronized void ensureIndexIsRead() throws IOException {
if (indexTerms != null) // index already read
return; // do nothing
try {
int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index
indexTerms = new Term[indexSize];
indexInfos = new TermInfo[indexSize];
indexPointers = new long[indexSize];
for (int i = 0; indexEnum.next(); i++) {
indexTerms[i] = indexEnum.term();
indexInfos[i] = indexEnum.termInfo();
indexPointers[i] = indexEnum.indexPointer;
for (int j = 1; j < indexDivisor; j++)
if (!indexEnum.next())
break;
}
} finally {
indexEnum.close();
indexEnum = null;
}
}
/** Returns the offset of the greatest index entry which is less than or equal to term.*/
private final int getIndexOffset(Term term) {
@ -223,7 +184,7 @@ final class TermInfosReader {
if (size == 0) return null;
ensureIndexIsRead();
TermInfo ti;
ThreadResources resources = getThreadResources();
Cache cache = null;
@ -302,6 +263,12 @@ final class TermInfosReader {
return enumerator.term();
}
private void ensureIndexIsRead() {
if (indexTerms == null) {
throw new IllegalStateException("terms index was not loaded when this reader was created");
}
}
/** Returns the position of a Term in the set or -1. */
final long getPosition(Term term) throws IOException {
if (size == 0) return -1;

View File

@ -1800,4 +1800,40 @@ public class TestIndexReader extends LuceneTestCase
writer.close();
dir.close();
}
// LUCENE-1609: don't load terms index
public void testNoTermsIndex() throws Throwable {
Directory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
doc.add(new Field("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED));
doc.add(new Field("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
writer.addDocument(doc);
writer.close();
IndexReader r = IndexReader.open(dir, null, true, -1);
try {
r.docFreq(new Term("field", "f"));
fail("did not hit expected exception");
} catch (IllegalStateException ise) {
// expected
}
assertFalse(((SegmentReader) r.getSequentialSubReaders()[0]).termsIndexLoaded());
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
writer.addDocument(doc);
writer.close();
// LUCENE-1718: ensure re-open carries over no terms index:
IndexReader r2 = r.reopen();
r.close();
IndexReader[] subReaders = r2.getSequentialSubReaders();
assertEquals(2, subReaders.length);
for(int i=0;i<2;i++) {
assertFalse(((SegmentReader) subReaders[i]).termsIndexLoaded());
}
r2.close();
dir.close();
}
}

View File

@ -28,7 +28,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.MockRAMDirectory;
public class TestSegmentReader extends LuceneTestCase {
private RAMDirectory dir = new RAMDirectory();
@ -203,20 +202,4 @@ public class TestSegmentReader extends LuceneTestCase {
assertTrue(results != null);
assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4);
}
public void testIndexDivisor() throws IOException {
dir = new MockRAMDirectory();
testDoc = new Document();
DocHelper.setupDoc(testDoc);
SegmentInfo si = DocHelper.writeDoc(dir, testDoc);
reader = SegmentReader.get(si);
reader.setTermInfosIndexDivisor(3);
testDocument();
testDelete();
testGetFieldNameVariations();
testNorms();
testTerms();
testTermVectors();
}
}

View File

@ -26,7 +26,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.io.IOException;
import org.apache.lucene.search.Similarity;
public class TestSegmentTermDocs extends LuceneTestCase {
private Document testDoc = new Document();
@ -53,8 +52,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
public void testTermDocs(int indexDivisor) throws IOException {
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(info);
reader.setTermInfosIndexDivisor(indexDivisor);
SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@ -76,8 +74,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
public void testBadSeek(int indexDivisor) throws IOException {
{
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(info);
reader.setTermInfosIndexDivisor(indexDivisor);
SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@ -87,8 +84,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
}
{
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(info);
reader.setTermInfosIndexDivisor(indexDivisor);
SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
assertTrue(reader != null);
SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
assertTrue(segTermDocs != null);
@ -123,9 +119,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(dir);
reader.setTermInfosIndexDivisor(indexDivisor);
assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());
IndexReader reader = IndexReader.open(dir, null, true, indexDivisor);
TermDocs tdocs = reader.termDocs();
@ -239,21 +233,6 @@ public class TestSegmentTermDocs extends LuceneTestCase {
testBadSeek(2);
testSkipTo(2);
}
public void testIndexDivisorAfterLoad() throws IOException {
dir = new MockRAMDirectory();
testDoc = new Document();
DocHelper.setupDoc(testDoc);
SegmentInfo si = DocHelper.writeDoc(dir, testDoc);
SegmentReader reader = SegmentReader.get(si);
assertEquals(1, reader.docFreq(new Term("keyField", "Keyword")));
try {
reader.setTermInfosIndexDivisor(2);
fail("did not hit IllegalStateException exception");
} catch (IllegalStateException ise) {
// expected
}
}
private void addDoc(IndexWriter writer, String value) throws IOException
{