mirror of https://github.com/apache/lucene.git
LUCENE-4203: add IndexWriter.tryDeleteDocument
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1368745 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4e099f3571
commit
21e7728e57
|
@ -43,6 +43,11 @@ New features
|
||||||
implementations to optimize the enum implementation. (Robert Muir,
|
implementations to optimize the enum implementation. (Robert Muir,
|
||||||
Mike McCandless)
|
Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
|
||||||
|
int docID), to attempt deletion by docID as long as the provided
|
||||||
|
reader is an NRT reader, and the segment has not yet been merged
|
||||||
|
away (Mike McCandless).
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
|
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
|
||||||
|
|
|
@ -1,8 +1,5 @@
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -20,6 +17,9 @@ import java.util.List;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@link IndexReaderContext} for {@link AtomicReader} instances
|
* {@link IndexReaderContext} for {@link AtomicReader} instances
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
|
@ -51,8 +51,9 @@ public final class AtomicReaderContext extends IndexReaderContext {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<AtomicReaderContext> leaves() {
|
public List<AtomicReaderContext> leaves() {
|
||||||
if (!isTopLevel)
|
if (!isTopLevel) {
|
||||||
throw new UnsupportedOperationException("This is not a top-level context.");
|
throw new UnsupportedOperationException("This is not a top-level context.");
|
||||||
|
}
|
||||||
assert leaves != null;
|
assert leaves != null;
|
||||||
return leaves;
|
return leaves;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -19,6 +17,8 @@ import java.util.List;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A struct like class that represents a hierarchical relationship between
|
* A struct like class that represents a hierarchical relationship between
|
||||||
* {@link IndexReader} instances.
|
* {@link IndexReader} instances.
|
||||||
|
|
|
@ -1241,6 +1241,78 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Expert: attempts to delete by document ID, as long as
|
||||||
|
* the provided reader is a near-real-time reader (from {@link
|
||||||
|
* DirectoryReader#open(IndexWriter,boolean)}). If the
|
||||||
|
* provided reader is an NRT reader obtained from this
|
||||||
|
* writer, and its segment has not been merged away, then
|
||||||
|
* the delete succeeds and this method returns true; else, it
|
||||||
|
* returns false the caller must then separately delete by
|
||||||
|
* Term or Query.
|
||||||
|
*
|
||||||
|
* <b>NOTE</b>: this method can only delete documents
|
||||||
|
* visible to the currently open NRT reader. If you need
|
||||||
|
* to delete documents indexed after opening the NRT
|
||||||
|
* reader you must use the other deleteDocument methods
|
||||||
|
* (e.g., {@link #deleteDocuments(Term)}). */
|
||||||
|
public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException {
|
||||||
|
|
||||||
|
final AtomicReader reader;
|
||||||
|
if (readerIn instanceof AtomicReader) {
|
||||||
|
// Reader is already atomic: use the incoming docID:
|
||||||
|
reader = (AtomicReader) readerIn;
|
||||||
|
} else {
|
||||||
|
// Composite reader: lookup sub-reader and re-base docID:
|
||||||
|
List<AtomicReaderContext> leaves = readerIn.getTopReaderContext().leaves();
|
||||||
|
int subIndex = ReaderUtil.subIndex(docID, leaves);
|
||||||
|
reader = leaves.get(subIndex).reader();
|
||||||
|
docID -= leaves.get(subIndex).docBase;
|
||||||
|
assert docID >= 0;
|
||||||
|
assert docID < reader.maxDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(reader instanceof SegmentReader)) {
|
||||||
|
throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders");
|
||||||
|
}
|
||||||
|
|
||||||
|
final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo();
|
||||||
|
|
||||||
|
// TODO: this is a slow linear search, but, number of
|
||||||
|
// segments should be contained unless something is
|
||||||
|
// seriously wrong w/ the index, so it should be a minor
|
||||||
|
// cost:
|
||||||
|
|
||||||
|
if (segmentInfos.indexOf(info) != -1) {
|
||||||
|
ReadersAndLiveDocs rld = readerPool.get(info, false);
|
||||||
|
if (rld != null) {
|
||||||
|
synchronized(bufferedDeletesStream) {
|
||||||
|
rld.initWritableLiveDocs();
|
||||||
|
if (rld.delete(docID)) {
|
||||||
|
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
|
||||||
|
if (fullDelCount == rld.info.info.getDocCount()) {
|
||||||
|
// If a merge has already registered for this
|
||||||
|
// segment, we leave it in the readerPool; the
|
||||||
|
// merge will skip merging it and will then drop
|
||||||
|
// it once it's done:
|
||||||
|
if (!mergingSegments.contains(rld.info)) {
|
||||||
|
segmentInfos.remove(rld.info);
|
||||||
|
readerPool.drop(rld.info);
|
||||||
|
checkpoint();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//System.out.println(" yes " + info.info.name + " " + docID);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//System.out.println(" no rld " + info.info.name + " " + docID);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//System.out.println(" no seg " + info.info.name + " " + docID);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deletes the document(s) containing any of the
|
* Deletes the document(s) containing any of the
|
||||||
* terms. All given deletes are applied and flushed atomically
|
* terms. All given deletes are applied and flushed atomically
|
||||||
|
|
|
@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.SegmentInfoPerCommit;
|
||||||
import org.apache.lucene.index.IndexReader; // javadocs
|
import org.apache.lucene.index.IndexReader; // javadocs
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
@ -254,6 +255,14 @@ public class NRTManager extends ReferenceManager<IndexSearcher> {
|
||||||
long getAndIncrementGeneration() {
|
long getAndIncrementGeneration() {
|
||||||
return indexingGen.getAndIncrement();
|
return indexingGen.getAndIncrement();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long tryDeleteDocument(IndexReader reader, int docID) throws IOException {
|
||||||
|
if (writer.tryDeleteDocument(reader, docID)) {
|
||||||
|
return indexingGen.get();
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -23,6 +23,9 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
||||||
import org.apache.lucene.document.*;
|
import org.apache.lucene.document.*;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.*;
|
import org.apache.lucene.store.*;
|
||||||
import org.apache.lucene.util.*;
|
import org.apache.lucene.util.*;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -48,10 +51,13 @@ public class TestRollingUpdates extends LuceneTestCase {
|
||||||
final int SIZE = atLeast(20);
|
final int SIZE = atLeast(20);
|
||||||
int id = 0;
|
int id = 0;
|
||||||
IndexReader r = null;
|
IndexReader r = null;
|
||||||
|
IndexSearcher s = null;
|
||||||
final int numUpdates = (int) (SIZE * (2+(TEST_NIGHTLY ? 200*random().nextDouble() : 5*random().nextDouble())));
|
final int numUpdates = (int) (SIZE * (2+(TEST_NIGHTLY ? 200*random().nextDouble() : 5*random().nextDouble())));
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("TEST: numUpdates=" + numUpdates);
|
System.out.println("TEST: numUpdates=" + numUpdates);
|
||||||
}
|
}
|
||||||
|
int updateCount = 0;
|
||||||
|
// TODO: sometimes update ids not in order...
|
||||||
for(int docIter=0;docIter<numUpdates;docIter++) {
|
for(int docIter=0;docIter<numUpdates;docIter++) {
|
||||||
final Document doc = docs.nextDoc();
|
final Document doc = docs.nextDoc();
|
||||||
final String myID = ""+id;
|
final String myID = ""+id;
|
||||||
|
@ -60,16 +66,59 @@ public class TestRollingUpdates extends LuceneTestCase {
|
||||||
} else {
|
} else {
|
||||||
id++;
|
id++;
|
||||||
}
|
}
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" docIter=" + docIter + " id=" + id);
|
||||||
|
}
|
||||||
((Field) doc.getField("docid")).setStringValue(myID);
|
((Field) doc.getField("docid")).setStringValue(myID);
|
||||||
w.updateDocument(new Term("docid", myID), doc);
|
|
||||||
|
Term idTerm = new Term("docid", myID);
|
||||||
|
|
||||||
|
final boolean doUpdate;
|
||||||
|
if (s != null && updateCount < SIZE) {
|
||||||
|
TopDocs hits = s.search(new TermQuery(idTerm), 1);
|
||||||
|
assertEquals(1, hits.totalHits);
|
||||||
|
doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
|
||||||
|
if (VERBOSE) {
|
||||||
|
if (doUpdate) {
|
||||||
|
System.out.println(" tryDeleteDocument failed");
|
||||||
|
} else {
|
||||||
|
System.out.println(" tryDeleteDocument succeeded");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
doUpdate = true;
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" no searcher: doUpdate=true");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
updateCount++;
|
||||||
|
|
||||||
|
if (doUpdate) {
|
||||||
|
w.updateDocument(idTerm, doc);
|
||||||
|
} else {
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
if (docIter >= SIZE && random().nextInt(50) == 17) {
|
if (docIter >= SIZE && random().nextInt(50) == 17) {
|
||||||
if (r != null) {
|
if (r != null) {
|
||||||
r.close();
|
r.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean applyDeletions = random().nextBoolean();
|
final boolean applyDeletions = random().nextBoolean();
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
|
||||||
|
}
|
||||||
|
|
||||||
r = w.getReader(applyDeletions);
|
r = w.getReader(applyDeletions);
|
||||||
|
if (applyDeletions) {
|
||||||
|
s = new IndexSearcher(r);
|
||||||
|
} else {
|
||||||
|
s = null;
|
||||||
|
}
|
||||||
assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE);
|
assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE);
|
||||||
|
updateCount = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue