LUCENE-4203: add IndexWriter.tryDeleteDocument

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1368745 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-08-02 22:46:27 +00:00
parent 4e099f3571
commit 21e7728e57
6 changed files with 143 additions and 7 deletions

View File

@ -43,6 +43,11 @@ New features
implementations to optimize the enum implementation. (Robert Muir,
Mike McCandless)
* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
int docID), to attempt deletion by docID as long as the provided
reader is an NRT reader, and the segment has not yet been merged
away (Mike McCandless).
API Changes
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.

View File

@ -1,8 +1,5 @@
package org.apache.lucene.index;
import java.util.Collections;
import java.util.List;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -20,6 +17,9 @@ import java.util.List;
* limitations under the License.
*/
import java.util.Collections;
import java.util.List;
/**
* {@link IndexReaderContext} for {@link AtomicReader} instances
* @lucene.experimental
@ -51,8 +51,9 @@ public final class AtomicReaderContext extends IndexReaderContext {
@Override
public List<AtomicReaderContext> leaves() {
if (!isTopLevel)
if (!isTopLevel) {
throw new UnsupportedOperationException("This is not a top-level context.");
}
assert leaves != null;
return leaves;
}

View File

@ -1,7 +1,5 @@
package org.apache.lucene.index;
import java.util.List;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -19,6 +17,8 @@ import java.util.List;
* limitations under the License.
*/
import java.util.List;
/**
* A struct like class that represents a hierarchical relationship between
* {@link IndexReader} instances.

View File

@ -1241,6 +1241,78 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
/** Expert: attempts to delete by document ID, as long as
* the provided reader is a near-real-time reader (from {@link
* DirectoryReader#open(IndexWriter,boolean)}). If the
* provided reader is an NRT reader obtained from this
* writer, and its segment has not been merged away, then
* the delete succeeds and this method returns true; else, it
* returns false the caller must then separately delete by
* Term or Query.
*
* <b>NOTE</b>: this method can only delete documents
* visible to the currently open NRT reader. If you need
* to delete documents indexed after opening the NRT
* reader you must use the other deleteDocument methods
* (e.g., {@link #deleteDocuments(Term)}). */
public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException {
final AtomicReader reader;
if (readerIn instanceof AtomicReader) {
// Reader is already atomic: use the incoming docID:
reader = (AtomicReader) readerIn;
} else {
// Composite reader: lookup sub-reader and re-base docID:
List<AtomicReaderContext> leaves = readerIn.getTopReaderContext().leaves();
int subIndex = ReaderUtil.subIndex(docID, leaves);
reader = leaves.get(subIndex).reader();
docID -= leaves.get(subIndex).docBase;
assert docID >= 0;
assert docID < reader.maxDoc();
}
if (!(reader instanceof SegmentReader)) {
throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders");
}
final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo();
// TODO: this is a slow linear search, but, number of
// segments should be contained unless something is
// seriously wrong w/ the index, so it should be a minor
// cost:
if (segmentInfos.indexOf(info) != -1) {
ReadersAndLiveDocs rld = readerPool.get(info, false);
if (rld != null) {
synchronized(bufferedDeletesStream) {
rld.initWritableLiveDocs();
if (rld.delete(docID)) {
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
if (fullDelCount == rld.info.info.getDocCount()) {
// If a merge has already registered for this
// segment, we leave it in the readerPool; the
// merge will skip merging it and will then drop
// it once it's done:
if (!mergingSegments.contains(rld.info)) {
segmentInfos.remove(rld.info);
readerPool.drop(rld.info);
checkpoint();
}
}
}
//System.out.println(" yes " + info.info.name + " " + docID);
return true;
}
} else {
//System.out.println(" no rld " + info.info.name + " " + docID);
}
} else {
//System.out.println(" no seg " + info.info.name + " " + docID);
}
return false;
}
/**
* Deletes the document(s) containing any of the
* terms. All given deletes are applied and flushed atomically

View File

@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.SegmentInfoPerCommit;
import org.apache.lucene.index.IndexReader; // javadocs
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
@ -254,6 +255,14 @@ public class NRTManager extends ReferenceManager<IndexSearcher> {
long getAndIncrementGeneration() {
return indexingGen.getAndIncrement();
}
public long tryDeleteDocument(IndexReader reader, int docID) throws IOException {
if (writer.tryDeleteDocument(reader, docID)) {
return indexingGen.get();
} else {
return -1;
}
}
}
/**

View File

@ -23,6 +23,9 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.document.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.*;
import org.apache.lucene.util.*;
import org.junit.Test;
@ -48,10 +51,13 @@ public class TestRollingUpdates extends LuceneTestCase {
final int SIZE = atLeast(20);
int id = 0;
IndexReader r = null;
IndexSearcher s = null;
final int numUpdates = (int) (SIZE * (2+(TEST_NIGHTLY ? 200*random().nextDouble() : 5*random().nextDouble())));
if (VERBOSE) {
System.out.println("TEST: numUpdates=" + numUpdates);
}
int updateCount = 0;
// TODO: sometimes update ids not in order...
for(int docIter=0;docIter<numUpdates;docIter++) {
final Document doc = docs.nextDoc();
final String myID = ""+id;
@ -60,16 +66,59 @@ public class TestRollingUpdates extends LuceneTestCase {
} else {
id++;
}
if (VERBOSE) {
System.out.println(" docIter=" + docIter + " id=" + id);
}
((Field) doc.getField("docid")).setStringValue(myID);
w.updateDocument(new Term("docid", myID), doc);
Term idTerm = new Term("docid", myID);
final boolean doUpdate;
if (s != null && updateCount < SIZE) {
TopDocs hits = s.search(new TermQuery(idTerm), 1);
assertEquals(1, hits.totalHits);
doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
if (VERBOSE) {
if (doUpdate) {
System.out.println(" tryDeleteDocument failed");
} else {
System.out.println(" tryDeleteDocument succeeded");
}
}
} else {
doUpdate = true;
if (VERBOSE) {
System.out.println(" no searcher: doUpdate=true");
}
}
updateCount++;
if (doUpdate) {
w.updateDocument(idTerm, doc);
} else {
w.addDocument(doc);
}
if (docIter >= SIZE && random().nextInt(50) == 17) {
if (r != null) {
r.close();
}
final boolean applyDeletions = random().nextBoolean();
if (VERBOSE) {
System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
}
r = w.getReader(applyDeletions);
if (applyDeletions) {
s = new IndexSearcher(r);
} else {
s = null;
}
assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE);
updateCount = 0;
}
}