LUCENE-2900: allow explicit control over whether deletes must be applied when pulling NRT reader

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1065337 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-01-30 18:06:37 +00:00
parent 9af1a72569
commit 277dfa0e88
20 changed files with 66 additions and 39 deletions

View File

@ -635,6 +635,13 @@ API Changes
it should keep it itself. Fixed Scorers to pass their parent Weight, so that
Scorer.visitSubScorers (LUCENE-2590) will work correctly.
(Robert Muir, Doron Cohen)
* LUCENE-2900: When opening a near-real-time (NRT) reader
(IndexReader.re/open(IndexWriter)) you can now specify whether
deletes should be applied. Applying deletes can be costly, and some
expert use cases can handle seeing deleted documents returned. The
deletes remain buffered so that the next time you open an NRT reader
and pass true, all deletes will be a applied. (Mike McCandless)
Bug fixes

View File

@ -1277,7 +1277,7 @@ public class TestQPHelper extends LuceneTestCase {
Document doc = new Document();
doc.add(newField("field", "", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc);
IndexReader r = IndexReader.open(w);
IndexReader r = IndexReader.open(w, true);
IndexSearcher s = new IndexSearcher(r);
Query q = new StandardQueryParser(new CannedAnalyzer()).parse("\"a\"", "field");

View File

@ -99,7 +99,7 @@ public class TestDistance extends LuceneTestCase {
public void testLatLongFilterOnDeletedDocs() throws Exception {
writer.deleteDocuments(new Term("name", "Potomac"));
IndexReader r = IndexReader.open(writer);
IndexReader r = IndexReader.open(writer, true);
LatLongDistanceFilter f = new LatLongDistanceFilter(new QueryWrapperFilter(new MatchAllDocsQuery()),
lat, lng, 1.0, latField, lngField);

View File

@ -70,6 +70,8 @@ class DirectoryReader extends IndexReader implements Cloneable {
// opened on a past IndexCommit:
private long maxIndexVersion;
private final boolean applyAllDeletes;
// static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly,
// final int termInfosIndexDivisor) throws CorruptIndexException, IOException {
// return open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor, null);
@ -107,6 +109,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
this.codecs = codecs;
}
readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
applyAllDeletes = false;
// To reduce the chance of hitting FileNotFound
// (and having to retry), we open segments in
@ -138,9 +141,11 @@ class DirectoryReader extends IndexReader implements Cloneable {
}
// Used by near real-time search
DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs) throws IOException {
DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs, boolean applyAllDeletes) throws IOException {
this.directory = writer.getDirectory();
this.readOnly = true;
this.applyAllDeletes = applyAllDeletes; // saved for reopen
segmentInfos = (SegmentInfos) infos.clone();// make sure we clone otherwise we share mutable state with IW
this.termInfosIndexDivisor = termInfosIndexDivisor;
if (codecs == null) {
@ -193,6 +198,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
this.segmentInfos = infos;
this.termInfosIndexDivisor = termInfosIndexDivisor;
this.readerFinishedListeners = readerFinishedListeners;
applyAllDeletes = false;
if (codecs == null) {
this.codecs = CodecProvider.getDefault();
@ -401,7 +407,7 @@ class DirectoryReader extends IndexReader implements Cloneable {
// TODO: right now we *always* make a new reader; in
// the future we could have write make some effort to
// detect that no changes have occurred
IndexReader reader = writer.getReader();
IndexReader reader = writer.getReader(applyAllDeletes);
reader.readerFinishedListeners = readerFinishedListeners;
return reader;
}

View File

@ -295,24 +295,26 @@ public abstract class IndexReader implements Cloneable,Closeable {
/**
* Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
*
*
* @param writer The IndexWriter to open from
* @param applyAllDeletes If true, all buffered deletes will
* be applied (made visible) in the returned reader. If
* false, the deletes are not applied but remain buffered
* (in IndexWriter) so that they will be applied in the
* future. Applying deletes can be costly, so if your app
* can tolerate deleted documents being returned you might
* gain some performance by passing false.
* @return The new IndexReader
* @throws CorruptIndexException
* @throws IOException if there is a low-level IO error
*
* @see #reopen(IndexWriter)
* @see #reopen(IndexWriter,boolean)
*
* @lucene.experimental
*/
public static IndexReader open(final IndexWriter writer) throws CorruptIndexException, IOException {
return writer.getReader();
public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
return writer.getReader(applyAllDeletes);
}
/** Expert: returns an IndexReader reading the index in the given
* {@link IndexCommit}. You should pass readOnly=true, since it
* gives much better concurrent performance, unless you
@ -617,18 +619,26 @@ public abstract class IndexReader implements Cloneable,Closeable {
* if you attempt to reopen any of those readers, you'll
* hit an {@link AlreadyClosedException}.</p>
*
* @lucene.experimental
*
* @return IndexReader that covers entire index plus all
* changes made so far by this IndexWriter instance
*
* @param writer The IndexWriter to open from
* @param applyAllDeletes If true, all buffered deletes will
* be applied (made visible) in the returned reader. If
* false, the deletes are not applied but remain buffered
* (in IndexWriter) so that they will be applied in the
* future. Applying deletes can be costly, so if your app
* can tolerate deleted documents being returned you might
* gain some performance by passing false.
*
* @throws IOException
*
* @lucene.experimental
*/
public IndexReader reopen(IndexWriter writer) throws CorruptIndexException, IOException {
return writer.getReader();
public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
return writer.getReader(applyAllDeletes);
}
/**
* Efficiently clones the IndexReader (sharing most
* internal state).

View File

@ -274,6 +274,10 @@ public class IndexWriter implements Closeable {
// for testing
boolean anyNonBulkMerges;
IndexReader getReader() throws IOException {
return getReader(true);
}
/**
* Expert: returns a readonly reader, covering all
* committed as well as un-committed changes to the index.
@ -333,7 +337,7 @@ public class IndexWriter implements Closeable {
*
* @throws IOException
*/
IndexReader getReader() throws IOException {
IndexReader getReader(boolean applyAllDeletes) throws IOException {
ensureOpen();
final long tStart = System.currentTimeMillis();
@ -352,8 +356,8 @@ public class IndexWriter implements Closeable {
// just like we do when loading segments_N
IndexReader r;
synchronized(this) {
flush(false, true);
r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs);
flush(false, applyAllDeletes);
r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes);
if (infoStream != null) {
message("return reader version=" + r.getVersion() + " reader=" + r);
}
@ -2463,9 +2467,9 @@ public class IndexWriter implements Closeable {
* to the Directory.
* @param triggerMerge if true, we may merge segments (if
* deletes or docs were flushed) if necessary
* @param flushDeletes whether pending deletes should also
* @param applyAllDeletes whether pending deletes should also
*/
protected final void flush(boolean triggerMerge, boolean flushDeletes) throws CorruptIndexException, IOException {
protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
// NOTE: this method cannot be sync'd because
// maybeMerge() in turn calls mergeScheduler.merge which
@ -2476,7 +2480,7 @@ public class IndexWriter implements Closeable {
// We can be called during close, when closing==true, so we must pass false to ensureOpen:
ensureOpen(false);
if (doFlush(flushDeletes) && triggerMerge) {
if (doFlush(applyAllDeletes) && triggerMerge) {
maybeMerge();
}
}

View File

@ -660,7 +660,7 @@ public class TestExternalCodecs extends LuceneTestCase {
}
w.deleteDocuments(new Term("id", "77"));
IndexReader r = IndexReader.open(w);
IndexReader r = IndexReader.open(w, true);
IndexReader[] subs = r.getSequentialSubReaders();
// test each segment
for(int i=0;i<subs.length;i++) {
@ -678,7 +678,7 @@ public class TestExternalCodecs extends LuceneTestCase {
w.deleteDocuments(new Term("id", "44"));
w.optimize();
r = IndexReader.open(w);
r = IndexReader.open(w, true);
assertEquals(NUM_DOCS-2, r.maxDoc());
assertEquals(NUM_DOCS-2, r.numDocs());
s = new IndexSearcher(r);

View File

@ -1010,7 +1010,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
Document doc = new Document();
doc.add(new Field("f", "val", Store.NO, Index.ANALYZED));
w.addDocument(doc);
IndexReader r = IndexReader.open(w).getSequentialSubReaders()[0];
IndexReader r = IndexReader.open(w, true).getSequentialSubReaders()[0];
try {
r.termDocsEnum(null, "f", new BytesRef("val"));
fail("should have failed to seek since terms index was not loaded. Codec used " + conf.getCodecProvider().getFieldCodec("f"));

View File

@ -206,7 +206,7 @@ public class TestNRTThreads extends LuceneTestCase {
// let index build up a bit
Thread.sleep(100);
IndexReader r = IndexReader.open(writer);
IndexReader r = IndexReader.open(writer, true);
boolean any = false;
// silly starting guess:
@ -239,7 +239,7 @@ public class TestNRTThreads extends LuceneTestCase {
if (VERBOSE) {
System.out.println("TEST: now open");
}
r = IndexReader.open(writer);
r = IndexReader.open(writer, true);
}
if (VERBOSE) {
System.out.println("TEST: got new reader=" + r);

View File

@ -1140,7 +1140,7 @@ public class TestQueryParser extends LuceneTestCase {
Document doc = new Document();
doc.add(newField("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc);
IndexReader r = IndexReader.open(w);
IndexReader r = IndexReader.open(w, true);
w.close();
IndexSearcher s = new IndexSearcher(r);
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "f", a);

View File

@ -47,7 +47,7 @@ public class TestCachingSpanFilter extends LuceneTestCase {
// flipping a coin) may give us a newly opened reader,
// but we use .reopen on this reader below and expect to
// (must) get an NRT reader:
IndexReader reader = IndexReader.open(writer.w);
IndexReader reader = IndexReader.open(writer.w, true);
IndexSearcher searcher = new IndexSearcher(reader);
// add a doc, refresh the reader, and check that its there

View File

@ -169,7 +169,7 @@ public class TestCachingWrapperFilter extends LuceneTestCase {
// flipping a coin) may give us a newly opened reader,
// but we use .reopen on this reader below and expect to
// (must) get an NRT reader:
IndexReader reader = IndexReader.open(writer.w);
IndexReader reader = IndexReader.open(writer.w, true);
IndexSearcher searcher = new IndexSearcher(reader);
// add a doc, refresh the reader, and check that its there

View File

@ -50,7 +50,7 @@ public class TestElevationComparator extends LuceneTestCase {
writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"}));
writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"}));
IndexReader r = IndexReader.open(writer);
IndexReader r = IndexReader.open(writer, true);
writer.close();
IndexSearcher searcher = new IndexSearcher(r);

View File

@ -214,7 +214,7 @@ public class TestFieldCache extends LuceneTestCase {
public void testEmptyIndex() throws Exception {
Directory dir = newDirectory();
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(500));
IndexReader r = IndexReader.open(writer);
IndexReader r = IndexReader.open(writer, true);
FieldCache.DocTerms terms = FieldCache.DEFAULT.getTerms(r, "foobar");
FieldCache.DocTermsIndex termsIndex = FieldCache.DEFAULT.getTermsIndex(r, "foobar");
r.close();

View File

@ -1059,7 +1059,7 @@ public class TestSort extends LuceneTestCase implements Serializable {
doc.add(newField("t", "1", Field.Store.NO, Field.Index.NOT_ANALYZED));
w.addDocument(doc);
IndexReader r = IndexReader.open(w);
IndexReader r = IndexReader.open(w, true);
w.close();
IndexSearcher s = new IndexSearcher(r);
TopDocs hits = s.search(new TermQuery(new Term("t", "1")), null, 10, new Sort(new SortField("f", SortField.STRING)));

View File

@ -40,7 +40,7 @@ public class TestValueSource extends LuceneTestCase {
w.commit();
}
IndexReader r = IndexReader.open(w);
IndexReader r = IndexReader.open(w, true);
w.close();
assertTrue(r.getSequentialSubReaders().length > 1);

View File

@ -126,7 +126,7 @@ public class PayloadHelper {
doc.add(new Field(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}
reader = IndexReader.open(writer);
reader = IndexReader.open(writer, true);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);

View File

@ -49,7 +49,7 @@ public class TestFileSwitchDirectory extends LuceneTestCase {
setMergePolicy(newLogMergePolicy(false))
);
TestIndexWriterReader.createIndexNoClose(true, "ram", writer);
IndexReader reader = IndexReader.open(writer);
IndexReader reader = IndexReader.open(writer, true);
assertEquals(100, reader.maxDoc());
writer.commit();
// we should see only fdx,fdt files here

View File

@ -960,7 +960,7 @@ public class TestFSTs extends LuceneTestCase {
writer.addDocument(doc);
docCount++;
}
IndexReader r = IndexReader.open(writer);
IndexReader r = IndexReader.open(writer, true);
writer.close();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);

View File

@ -59,7 +59,7 @@ public class NearRealtimeReaderTask extends PerfTask {
}
long t = System.currentTimeMillis();
IndexReader r = IndexReader.open(w);
IndexReader r = IndexReader.open(w, true);
runData.setIndexReader(r);
// Transfer our reference to runData
r.decRef();