mirror of https://github.com/apache/lucene.git
LUCENE-4203: add IndexWriter.tryDeleteDocument
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1368745 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4e099f3571
commit
21e7728e57
|
@ -43,6 +43,11 @@ New features
|
|||
implementations to optimize the enum implementation. (Robert Muir,
|
||||
Mike McCandless)
|
||||
|
||||
* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
|
||||
int docID), to attempt deletion by docID as long as the provided
|
||||
reader is an NRT reader, and the segment has not yet been merged
|
||||
away (Mike McCandless).
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -20,6 +17,9 @@ import java.util.List;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* {@link IndexReaderContext} for {@link AtomicReader} instances
|
||||
* @lucene.experimental
|
||||
|
@ -51,8 +51,9 @@ public final class AtomicReaderContext extends IndexReaderContext {
|
|||
|
||||
@Override
|
||||
public List<AtomicReaderContext> leaves() {
|
||||
if (!isTopLevel)
|
||||
if (!isTopLevel) {
|
||||
throw new UnsupportedOperationException("This is not a top-level context.");
|
||||
}
|
||||
assert leaves != null;
|
||||
return leaves;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -19,6 +17,8 @@ import java.util.List;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A struct like class that represents a hierarchical relationship between
|
||||
* {@link IndexReader} instances.
|
||||
|
|
|
@ -1241,6 +1241,78 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
}
|
||||
}
|
||||
|
||||
/** Expert: attempts to delete by document ID, as long as
|
||||
* the provided reader is a near-real-time reader (from {@link
|
||||
* DirectoryReader#open(IndexWriter,boolean)}). If the
|
||||
* provided reader is an NRT reader obtained from this
|
||||
* writer, and its segment has not been merged away, then
|
||||
* the delete succeeds and this method returns true; else, it
|
||||
* returns false the caller must then separately delete by
|
||||
* Term or Query.
|
||||
*
|
||||
* <b>NOTE</b>: this method can only delete documents
|
||||
* visible to the currently open NRT reader. If you need
|
||||
* to delete documents indexed after opening the NRT
|
||||
* reader you must use the other deleteDocument methods
|
||||
* (e.g., {@link #deleteDocuments(Term)}). */
|
||||
public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException {
|
||||
|
||||
final AtomicReader reader;
|
||||
if (readerIn instanceof AtomicReader) {
|
||||
// Reader is already atomic: use the incoming docID:
|
||||
reader = (AtomicReader) readerIn;
|
||||
} else {
|
||||
// Composite reader: lookup sub-reader and re-base docID:
|
||||
List<AtomicReaderContext> leaves = readerIn.getTopReaderContext().leaves();
|
||||
int subIndex = ReaderUtil.subIndex(docID, leaves);
|
||||
reader = leaves.get(subIndex).reader();
|
||||
docID -= leaves.get(subIndex).docBase;
|
||||
assert docID >= 0;
|
||||
assert docID < reader.maxDoc();
|
||||
}
|
||||
|
||||
if (!(reader instanceof SegmentReader)) {
|
||||
throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders");
|
||||
}
|
||||
|
||||
final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo();
|
||||
|
||||
// TODO: this is a slow linear search, but, number of
|
||||
// segments should be contained unless something is
|
||||
// seriously wrong w/ the index, so it should be a minor
|
||||
// cost:
|
||||
|
||||
if (segmentInfos.indexOf(info) != -1) {
|
||||
ReadersAndLiveDocs rld = readerPool.get(info, false);
|
||||
if (rld != null) {
|
||||
synchronized(bufferedDeletesStream) {
|
||||
rld.initWritableLiveDocs();
|
||||
if (rld.delete(docID)) {
|
||||
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
|
||||
if (fullDelCount == rld.info.info.getDocCount()) {
|
||||
// If a merge has already registered for this
|
||||
// segment, we leave it in the readerPool; the
|
||||
// merge will skip merging it and will then drop
|
||||
// it once it's done:
|
||||
if (!mergingSegments.contains(rld.info)) {
|
||||
segmentInfos.remove(rld.info);
|
||||
readerPool.drop(rld.info);
|
||||
checkpoint();
|
||||
}
|
||||
}
|
||||
}
|
||||
//System.out.println(" yes " + info.info.name + " " + docID);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
//System.out.println(" no rld " + info.info.name + " " + docID);
|
||||
}
|
||||
} else {
|
||||
//System.out.println(" no seg " + info.info.name + " " + docID);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the document(s) containing any of the
|
||||
* terms. All given deletes are applied and flushed atomically
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.SegmentInfoPerCommit;
|
||||
import org.apache.lucene.index.IndexReader; // javadocs
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
@ -254,6 +255,14 @@ public class NRTManager extends ReferenceManager<IndexSearcher> {
|
|||
long getAndIncrementGeneration() {
|
||||
return indexingGen.getAndIncrement();
|
||||
}
|
||||
|
||||
public long tryDeleteDocument(IndexReader reader, int docID) throws IOException {
|
||||
if (writer.tryDeleteDocument(reader, docID)) {
|
||||
return indexingGen.get();
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -23,6 +23,9 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.junit.Test;
|
||||
|
@ -48,10 +51,13 @@ public class TestRollingUpdates extends LuceneTestCase {
|
|||
final int SIZE = atLeast(20);
|
||||
int id = 0;
|
||||
IndexReader r = null;
|
||||
IndexSearcher s = null;
|
||||
final int numUpdates = (int) (SIZE * (2+(TEST_NIGHTLY ? 200*random().nextDouble() : 5*random().nextDouble())));
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: numUpdates=" + numUpdates);
|
||||
}
|
||||
int updateCount = 0;
|
||||
// TODO: sometimes update ids not in order...
|
||||
for(int docIter=0;docIter<numUpdates;docIter++) {
|
||||
final Document doc = docs.nextDoc();
|
||||
final String myID = ""+id;
|
||||
|
@ -60,16 +66,59 @@ public class TestRollingUpdates extends LuceneTestCase {
|
|||
} else {
|
||||
id++;
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println(" docIter=" + docIter + " id=" + id);
|
||||
}
|
||||
((Field) doc.getField("docid")).setStringValue(myID);
|
||||
w.updateDocument(new Term("docid", myID), doc);
|
||||
|
||||
Term idTerm = new Term("docid", myID);
|
||||
|
||||
final boolean doUpdate;
|
||||
if (s != null && updateCount < SIZE) {
|
||||
TopDocs hits = s.search(new TermQuery(idTerm), 1);
|
||||
assertEquals(1, hits.totalHits);
|
||||
doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
|
||||
if (VERBOSE) {
|
||||
if (doUpdate) {
|
||||
System.out.println(" tryDeleteDocument failed");
|
||||
} else {
|
||||
System.out.println(" tryDeleteDocument succeeded");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
doUpdate = true;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" no searcher: doUpdate=true");
|
||||
}
|
||||
}
|
||||
|
||||
updateCount++;
|
||||
|
||||
if (doUpdate) {
|
||||
w.updateDocument(idTerm, doc);
|
||||
} else {
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (docIter >= SIZE && random().nextInt(50) == 17) {
|
||||
if (r != null) {
|
||||
r.close();
|
||||
}
|
||||
|
||||
final boolean applyDeletions = random().nextBoolean();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
|
||||
}
|
||||
|
||||
r = w.getReader(applyDeletions);
|
||||
if (applyDeletions) {
|
||||
s = new IndexSearcher(r);
|
||||
} else {
|
||||
s = null;
|
||||
}
|
||||
assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE);
|
||||
updateCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue