mirror of https://github.com/apache/lucene.git
LUCENE-992: move buffered deletes into DocumentsWriter so IndexWriter.updateDocument is atomic
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@574260 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bd493d6c80
commit
e7179ea866
|
@ -83,6 +83,9 @@ Bug fixes
|
||||||
|
|
||||||
13. LUCENE-991: The explain() method of BoostingTermQuery had errors when no payloads were present on a document. (Peter Keegan via Grant Ingersoll)
|
13. LUCENE-991: The explain() method of BoostingTermQuery had errors when no payloads were present on a document. (Peter Keegan via Grant Ingersoll)
|
||||||
|
|
||||||
|
14. LUCENE-992: Fixed IndexWriter.updateDocument to be atomic again
|
||||||
|
(this was broken by LUCENE-843). (Ning Li via Mike McCandless)
|
||||||
|
|
||||||
New features
|
New features
|
||||||
|
|
||||||
1. LUCENE-906: Elision filter for French.
|
1. LUCENE-906: Elision filter for French.
|
||||||
|
|
|
@ -129,6 +129,16 @@ final class DocumentsWriter {
|
||||||
|
|
||||||
private PrintStream infoStream;
|
private PrintStream infoStream;
|
||||||
|
|
||||||
|
// This Hashmap buffers delete terms in ram before they
|
||||||
|
// are applied. The key is delete term; the value is
|
||||||
|
// number of buffered documents the term applies to.
|
||||||
|
private HashMap bufferedDeleteTerms = new HashMap();
|
||||||
|
private int numBufferedDeleteTerms = 0;
|
||||||
|
|
||||||
|
// The max number of delete terms that can be buffered before
|
||||||
|
// they must be flushed to disk.
|
||||||
|
private int maxBufferedDeleteTerms = IndexWriter.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
|
||||||
|
|
||||||
// How much RAM we can use before flushing. This is 0 if
|
// How much RAM we can use before flushing. This is 0 if
|
||||||
// we are flushing by doc count instead.
|
// we are flushing by doc count instead.
|
||||||
private long ramBufferSize = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB*1024*1024);
|
private long ramBufferSize = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB*1024*1024);
|
||||||
|
@ -265,8 +275,8 @@ final class DocumentsWriter {
|
||||||
|
|
||||||
/** Called if we hit an exception when adding docs,
|
/** Called if we hit an exception when adding docs,
|
||||||
* flushing, etc. This resets our state, discarding any
|
* flushing, etc. This resets our state, discarding any
|
||||||
* * docs added since last flush. */
|
* docs added since last flush. */
|
||||||
void abort() throws IOException {
|
synchronized void abort() throws IOException {
|
||||||
|
|
||||||
// Forcefully remove waiting ThreadStates from line
|
// Forcefully remove waiting ThreadStates from line
|
||||||
for(int i=0;i<numWaiting;i++)
|
for(int i=0;i<numWaiting;i++)
|
||||||
|
@ -275,6 +285,9 @@ final class DocumentsWriter {
|
||||||
|
|
||||||
pauseAllThreads();
|
pauseAllThreads();
|
||||||
|
|
||||||
|
bufferedDeleteTerms.clear();
|
||||||
|
numBufferedDeleteTerms = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
// Discard pending norms:
|
// Discard pending norms:
|
||||||
|
@ -2063,8 +2076,10 @@ final class DocumentsWriter {
|
||||||
|
|
||||||
/** Returns a free (idle) ThreadState that may be used for
|
/** Returns a free (idle) ThreadState that may be used for
|
||||||
* indexing this one document. This call also pauses if a
|
* indexing this one document. This call also pauses if a
|
||||||
* flush is pending. */
|
* flush is pending. If delTerm is non-null then we
|
||||||
synchronized ThreadState getThreadState(Document doc) throws IOException {
|
* buffer this deleted term after the thread state has
|
||||||
|
* been acquired. */
|
||||||
|
synchronized ThreadState getThreadState(Document doc, Term delTerm) throws IOException {
|
||||||
|
|
||||||
// First, find a thread state. If this thread already
|
// First, find a thread state. If this thread already
|
||||||
// has affinity to a specific ThreadState, use that one
|
// has affinity to a specific ThreadState, use that one
|
||||||
|
@ -2134,6 +2149,9 @@ final class DocumentsWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (delTerm != null)
|
||||||
|
addDeleteTerm(delTerm, state.docID);
|
||||||
|
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2141,9 +2159,19 @@ final class DocumentsWriter {
|
||||||
* flush. */
|
* flush. */
|
||||||
boolean addDocument(Document doc, Analyzer analyzer)
|
boolean addDocument(Document doc, Analyzer analyzer)
|
||||||
throws CorruptIndexException, IOException {
|
throws CorruptIndexException, IOException {
|
||||||
|
return updateDocument(doc, analyzer, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean updateDocument(Term t, Document doc, Analyzer analyzer)
|
||||||
|
throws CorruptIndexException, IOException {
|
||||||
|
return updateDocument(doc, analyzer, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean updateDocument(Document doc, Analyzer analyzer, Term delTerm)
|
||||||
|
throws CorruptIndexException, IOException {
|
||||||
|
|
||||||
// This call is synchronized but fast
|
// This call is synchronized but fast
|
||||||
final ThreadState state = getThreadState(doc);
|
final ThreadState state = getThreadState(doc, delTerm);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
// This call is not synchronized and does all the work
|
// This call is not synchronized and does all the work
|
||||||
|
@ -2157,7 +2185,96 @@ final class DocumentsWriter {
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return state.doFlushAfter;
|
return state.doFlushAfter || timeToFlushDeletes();
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized int getNumBufferedDeleteTerms() {
|
||||||
|
return numBufferedDeleteTerms;
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized HashMap getBufferedDeleteTerms() {
|
||||||
|
return bufferedDeleteTerms;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset buffered deletes.
|
||||||
|
synchronized void clearBufferedDeleteTerms() {
|
||||||
|
bufferedDeleteTerms.clear();
|
||||||
|
numBufferedDeleteTerms = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized boolean bufferDeleteTerms(Term[] terms) throws IOException {
|
||||||
|
while(pauseThreads != 0 || flushPending)
|
||||||
|
try {
|
||||||
|
wait();
|
||||||
|
} catch (InterruptedException e) {}
|
||||||
|
for (int i = 0; i < terms.length; i++)
|
||||||
|
addDeleteTerm(terms[i], numDocsInRAM);
|
||||||
|
return timeToFlushDeletes();
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized boolean bufferDeleteTerm(Term term) throws IOException {
|
||||||
|
while(pauseThreads != 0 || flushPending)
|
||||||
|
try {
|
||||||
|
wait();
|
||||||
|
} catch (InterruptedException e) {}
|
||||||
|
addDeleteTerm(term, numDocsInRAM);
|
||||||
|
return timeToFlushDeletes();
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized private boolean timeToFlushDeletes() {
|
||||||
|
return numBufferedDeleteTerms >= maxBufferedDeleteTerms && setFlushPending();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
|
||||||
|
if (maxBufferedDeleteTerms < 1)
|
||||||
|
throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1");
|
||||||
|
this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getMaxBufferedDeleteTerms() {
|
||||||
|
return maxBufferedDeleteTerms;
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized boolean hasDeletes() {
|
||||||
|
return bufferedDeleteTerms.size() > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Number of documents a delete term applies to.
|
||||||
|
static class Num {
|
||||||
|
private int num;
|
||||||
|
|
||||||
|
Num(int num) {
|
||||||
|
this.num = num;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getNum() {
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setNum(int num) {
|
||||||
|
// Only record the new number if it's greater than the
|
||||||
|
// current one. This is important because if multiple
|
||||||
|
// threads are replacing the same doc at nearly the
|
||||||
|
// same time, it's possible that one thread that got a
|
||||||
|
// higher docID is scheduled before the other
|
||||||
|
// threads.
|
||||||
|
if (num > this.num)
|
||||||
|
this.num = num;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Buffer a term in bufferedDeleteTerms, which records the
|
||||||
|
// current number of documents buffered in ram so that the
|
||||||
|
// delete term will be applied to those documents as well
|
||||||
|
// as the disk segments.
|
||||||
|
synchronized private void addDeleteTerm(Term term, int docCount) {
|
||||||
|
Num num = (Num) bufferedDeleteTerms.get(term);
|
||||||
|
if (num == null) {
|
||||||
|
bufferedDeleteTerms.put(term, new Num(docCount));
|
||||||
|
} else {
|
||||||
|
num.setNum(docCount);
|
||||||
|
}
|
||||||
|
numBufferedDeleteTerms++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Does the synchronized work to finish/flush the
|
/** Does the synchronized work to finish/flush the
|
||||||
|
|
|
@ -247,16 +247,6 @@ public class IndexWriter {
|
||||||
|
|
||||||
private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
|
private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
|
||||||
|
|
||||||
// The max number of delete terms that can be buffered before
|
|
||||||
// they must be flushed to disk.
|
|
||||||
private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
|
|
||||||
|
|
||||||
// This Hashmap buffers delete terms in ram before they are applied.
|
|
||||||
// The key is delete term; the value is number of ram
|
|
||||||
// segments the term applies to.
|
|
||||||
private HashMap bufferedDeleteTerms = new HashMap();
|
|
||||||
private int numBufferedDeleteTerms = 0;
|
|
||||||
|
|
||||||
/** Use compound file setting. Defaults to true, minimizing the number of
|
/** Use compound file setting. Defaults to true, minimizing the number of
|
||||||
* files used. Setting this to false may improve indexing performance, but
|
* files used. Setting this to false may improve indexing performance, but
|
||||||
* may also cause file handle problems.
|
* may also cause file handle problems.
|
||||||
|
@ -773,9 +763,7 @@ public class IndexWriter {
|
||||||
*/
|
*/
|
||||||
public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
|
public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
if (maxBufferedDeleteTerms < 1)
|
docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
|
||||||
throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1");
|
|
||||||
this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -785,7 +773,7 @@ public class IndexWriter {
|
||||||
*/
|
*/
|
||||||
public int getMaxBufferedDeleteTerms() {
|
public int getMaxBufferedDeleteTerms() {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
return maxBufferedDeleteTerms;
|
return docWriter.getMaxBufferedDeleteTerms();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Determines how often segment indices are merged by addDocument(). With
|
/** Determines how often segment indices are merged by addDocument(). With
|
||||||
|
@ -1134,10 +1122,11 @@ public class IndexWriter {
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
public synchronized void deleteDocuments(Term term) throws CorruptIndexException, IOException {
|
public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
bufferDeleteTerm(term);
|
boolean doFlush = docWriter.bufferDeleteTerm(term);
|
||||||
maybeFlush();
|
if (doFlush)
|
||||||
|
flush(true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1148,12 +1137,11 @@ public class IndexWriter {
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
public synchronized void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException {
|
public void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
for (int i = 0; i < terms.length; i++) {
|
boolean doFlush = docWriter.bufferDeleteTerms(terms);
|
||||||
bufferDeleteTerm(terms[i]);
|
if (doFlush)
|
||||||
}
|
flush(true, false);
|
||||||
maybeFlush();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1189,20 +1177,15 @@ public class IndexWriter {
|
||||||
public void updateDocument(Term term, Document doc, Analyzer analyzer)
|
public void updateDocument(Term term, Document doc, Analyzer analyzer)
|
||||||
throws CorruptIndexException, IOException {
|
throws CorruptIndexException, IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
synchronized (this) {
|
boolean doFlush = false;
|
||||||
bufferDeleteTerm(term);
|
|
||||||
}
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
try {
|
||||||
success = docWriter.addDocument(doc, analyzer);
|
doFlush = docWriter.updateDocument(term, doc, analyzer);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
deleter.refresh();
|
deleter.refresh();
|
||||||
throw ioe;
|
throw ioe;
|
||||||
}
|
}
|
||||||
if (success)
|
if (doFlush)
|
||||||
flush(true, false);
|
flush(true, false);
|
||||||
else
|
|
||||||
maybeFlush();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// for test purpose
|
// for test purpose
|
||||||
|
@ -1357,7 +1340,7 @@ public class IndexWriter {
|
||||||
*/
|
*/
|
||||||
private void startTransaction() throws IOException {
|
private void startTransaction() throws IOException {
|
||||||
|
|
||||||
assert numBufferedDeleteTerms == 0 :
|
assert docWriter.getNumBufferedDeleteTerms() == 0 :
|
||||||
"calling startTransaction with buffered delete terms not supported";
|
"calling startTransaction with buffered delete terms not supported";
|
||||||
assert docWriter.getNumDocsInRAM() == 0 :
|
assert docWriter.getNumDocsInRAM() == 0 :
|
||||||
"calling startTransaction with buffered documents not supported";
|
"calling startTransaction with buffered documents not supported";
|
||||||
|
@ -1462,9 +1445,6 @@ public class IndexWriter {
|
||||||
deleter.checkpoint(segmentInfos, false);
|
deleter.checkpoint(segmentInfos, false);
|
||||||
deleter.refresh();
|
deleter.refresh();
|
||||||
|
|
||||||
bufferedDeleteTerms.clear();
|
|
||||||
numBufferedDeleteTerms = 0;
|
|
||||||
|
|
||||||
commitPending = false;
|
commitPending = false;
|
||||||
docWriter.abort();
|
docWriter.abort();
|
||||||
close();
|
close();
|
||||||
|
@ -1845,20 +1825,6 @@ public class IndexWriter {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Used internally to trigger a flush if the number of
|
|
||||||
* buffered added documents or buffered deleted terms are
|
|
||||||
* large enough.
|
|
||||||
*/
|
|
||||||
protected final synchronized void maybeFlush() throws CorruptIndexException, IOException {
|
|
||||||
// We only check for flush due to number of buffered
|
|
||||||
// delete terms, because triggering of a flush due to
|
|
||||||
// too many added documents is handled by
|
|
||||||
// DocumentsWriter
|
|
||||||
if (numBufferedDeleteTerms >= maxBufferedDeleteTerms && docWriter.setFlushPending())
|
|
||||||
flush(true, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Flush all in-memory buffered updates (adds and deletes)
|
* Flush all in-memory buffered updates (adds and deletes)
|
||||||
* to the Directory.
|
* to the Directory.
|
||||||
|
@ -1908,7 +1874,7 @@ public class IndexWriter {
|
||||||
// when they are full or writer is being closed. We
|
// when they are full or writer is being closed. We
|
||||||
// have to fix the "applyDeletesSelectively" logic to
|
// have to fix the "applyDeletesSelectively" logic to
|
||||||
// apply to more than just the last flushed segment
|
// apply to more than just the last flushed segment
|
||||||
boolean flushDeletes = bufferedDeleteTerms.size() > 0;
|
boolean flushDeletes = docWriter.hasDeletes();
|
||||||
|
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
infoStream.println(" flush: flushDocs=" + flushDocs +
|
infoStream.println(" flush: flushDocs=" + flushDocs +
|
||||||
|
@ -1938,9 +1904,6 @@ public class IndexWriter {
|
||||||
|
|
||||||
SegmentInfos rollback = null;
|
SegmentInfos rollback = null;
|
||||||
|
|
||||||
HashMap saveBufferedDeleteTerms = null;
|
|
||||||
int saveNumBufferedDeleteTerms = 0;
|
|
||||||
|
|
||||||
if (flushDeletes)
|
if (flushDeletes)
|
||||||
rollback = (SegmentInfos) segmentInfos.clone();
|
rollback = (SegmentInfos) segmentInfos.clone();
|
||||||
|
|
||||||
|
@ -1975,9 +1938,9 @@ public class IndexWriter {
|
||||||
// buffer deletes longer and then flush them to
|
// buffer deletes longer and then flush them to
|
||||||
// multiple flushed segments, when
|
// multiple flushed segments, when
|
||||||
// autoCommit=false
|
// autoCommit=false
|
||||||
saveBufferedDeleteTerms = bufferedDeleteTerms;
|
int delCount = applyDeletes(flushDocs);
|
||||||
saveNumBufferedDeleteTerms = numBufferedDeleteTerms;
|
if (infoStream != null)
|
||||||
applyDeletes(flushDocs);
|
infoStream.println("flushed " + delCount + " deleted documents");
|
||||||
doAfterFlush();
|
doAfterFlush();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1991,11 +1954,6 @@ public class IndexWriter {
|
||||||
// SegmentInfo instances:
|
// SegmentInfo instances:
|
||||||
segmentInfos.clear();
|
segmentInfos.clear();
|
||||||
segmentInfos.addAll(rollback);
|
segmentInfos.addAll(rollback);
|
||||||
|
|
||||||
if (saveBufferedDeleteTerms != null) {
|
|
||||||
numBufferedDeleteTerms = saveNumBufferedDeleteTerms;
|
|
||||||
bufferedDeleteTerms = saveBufferedDeleteTerms;
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Remove segment we added, if any:
|
// Remove segment we added, if any:
|
||||||
|
@ -2319,11 +2277,14 @@ public class IndexWriter {
|
||||||
// flushedNewSegment is true then a new segment was just
|
// flushedNewSegment is true then a new segment was just
|
||||||
// created and flushed from the ram segments, so we will
|
// created and flushed from the ram segments, so we will
|
||||||
// selectively apply the deletes to that new segment.
|
// selectively apply the deletes to that new segment.
|
||||||
private final void applyDeletes(boolean flushedNewSegment) throws CorruptIndexException, IOException {
|
private final int applyDeletes(boolean flushedNewSegment) throws CorruptIndexException, IOException {
|
||||||
|
|
||||||
|
final HashMap bufferedDeleteTerms = docWriter.getBufferedDeleteTerms();
|
||||||
|
|
||||||
|
int delCount = 0;
|
||||||
if (bufferedDeleteTerms.size() > 0) {
|
if (bufferedDeleteTerms.size() > 0) {
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
infoStream.println("flush " + numBufferedDeleteTerms + " buffered deleted terms on "
|
infoStream.println("flush " + docWriter.getNumBufferedDeleteTerms() + " buffered deleted terms on "
|
||||||
+ segmentInfos.size() + " segments.");
|
+ segmentInfos.size() + " segments.");
|
||||||
|
|
||||||
if (flushedNewSegment) {
|
if (flushedNewSegment) {
|
||||||
|
@ -2337,7 +2298,7 @@ public class IndexWriter {
|
||||||
// Apply delete terms to the segment just flushed from ram
|
// Apply delete terms to the segment just flushed from ram
|
||||||
// apply appropriately so that a delete term is only applied to
|
// apply appropriately so that a delete term is only applied to
|
||||||
// the documents buffered before it, not those buffered after it.
|
// the documents buffered before it, not those buffered after it.
|
||||||
applyDeletesSelectively(bufferedDeleteTerms, reader);
|
delCount += applyDeletesSelectively(bufferedDeleteTerms, reader);
|
||||||
} finally {
|
} finally {
|
||||||
if (reader != null) {
|
if (reader != null) {
|
||||||
try {
|
try {
|
||||||
|
@ -2361,7 +2322,7 @@ public class IndexWriter {
|
||||||
|
|
||||||
// Apply delete terms to disk segments
|
// Apply delete terms to disk segments
|
||||||
// except the one just flushed from ram.
|
// except the one just flushed from ram.
|
||||||
applyDeletes(bufferedDeleteTerms, reader);
|
delCount += applyDeletes(bufferedDeleteTerms, reader);
|
||||||
} finally {
|
} finally {
|
||||||
if (reader != null) {
|
if (reader != null) {
|
||||||
try {
|
try {
|
||||||
|
@ -2374,15 +2335,10 @@ public class IndexWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clean up bufferedDeleteTerms.
|
// Clean up bufferedDeleteTerms.
|
||||||
|
docWriter.clearBufferedDeleteTerms();
|
||||||
// Rollbacks of buffered deletes are based on restoring the old
|
|
||||||
// map, so don't modify this one. Rare enough that the gc
|
|
||||||
// overhead is almost certainly lower than the alternate, which
|
|
||||||
// would be clone to support rollback.
|
|
||||||
|
|
||||||
bufferedDeleteTerms = new HashMap();
|
|
||||||
numBufferedDeleteTerms = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return delCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final boolean checkNonDecreasingLevels(int start) {
|
private final boolean checkNonDecreasingLevels(int start) {
|
||||||
|
@ -2410,59 +2366,28 @@ public class IndexWriter {
|
||||||
|
|
||||||
// For test purposes.
|
// For test purposes.
|
||||||
final synchronized int getBufferedDeleteTermsSize() {
|
final synchronized int getBufferedDeleteTermsSize() {
|
||||||
return bufferedDeleteTerms.size();
|
return docWriter.getBufferedDeleteTerms().size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// For test purposes.
|
// For test purposes.
|
||||||
final synchronized int getNumBufferedDeleteTerms() {
|
final synchronized int getNumBufferedDeleteTerms() {
|
||||||
return numBufferedDeleteTerms;
|
return docWriter.getNumBufferedDeleteTerms();
|
||||||
}
|
|
||||||
|
|
||||||
// Number of ram segments a delete term applies to.
|
|
||||||
private static class Num {
|
|
||||||
private int num;
|
|
||||||
|
|
||||||
Num(int num) {
|
|
||||||
this.num = num;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getNum() {
|
|
||||||
return num;
|
|
||||||
}
|
|
||||||
|
|
||||||
void setNum(int num) {
|
|
||||||
this.num = num;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Buffer a term in bufferedDeleteTerms, which records the
|
|
||||||
// current number of documents buffered in ram so that the
|
|
||||||
// delete term will be applied to those ram segments as
|
|
||||||
// well as the disk segments.
|
|
||||||
private void bufferDeleteTerm(Term term) {
|
|
||||||
Num num = (Num) bufferedDeleteTerms.get(term);
|
|
||||||
int numDoc = docWriter.getNumDocsInRAM();
|
|
||||||
if (num == null) {
|
|
||||||
bufferedDeleteTerms.put(term, new Num(numDoc));
|
|
||||||
} else {
|
|
||||||
num.setNum(numDoc);
|
|
||||||
}
|
|
||||||
numBufferedDeleteTerms++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply buffered delete terms to the segment just flushed from ram
|
// Apply buffered delete terms to the segment just flushed from ram
|
||||||
// apply appropriately so that a delete term is only applied to
|
// apply appropriately so that a delete term is only applied to
|
||||||
// the documents buffered before it, not those buffered after it.
|
// the documents buffered before it, not those buffered after it.
|
||||||
private final void applyDeletesSelectively(HashMap deleteTerms,
|
private final int applyDeletesSelectively(HashMap deleteTerms,
|
||||||
IndexReader reader) throws CorruptIndexException, IOException {
|
IndexReader reader) throws CorruptIndexException, IOException {
|
||||||
Iterator iter = deleteTerms.entrySet().iterator();
|
Iterator iter = deleteTerms.entrySet().iterator();
|
||||||
|
int delCount = 0;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
Entry entry = (Entry) iter.next();
|
Entry entry = (Entry) iter.next();
|
||||||
Term term = (Term) entry.getKey();
|
Term term = (Term) entry.getKey();
|
||||||
|
|
||||||
TermDocs docs = reader.termDocs(term);
|
TermDocs docs = reader.termDocs(term);
|
||||||
if (docs != null) {
|
if (docs != null) {
|
||||||
int num = ((Num) entry.getValue()).getNum();
|
int num = ((DocumentsWriter.Num) entry.getValue()).getNum();
|
||||||
try {
|
try {
|
||||||
while (docs.next()) {
|
while (docs.next()) {
|
||||||
int doc = docs.doc();
|
int doc = docs.doc();
|
||||||
|
@ -2470,21 +2395,37 @@ public class IndexWriter {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
reader.deleteDocument(doc);
|
reader.deleteDocument(doc);
|
||||||
|
delCount++;
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
docs.close();
|
docs.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return delCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply buffered delete terms to this reader.
|
// Apply buffered delete terms to this reader.
|
||||||
private final void applyDeletes(HashMap deleteTerms, IndexReader reader)
|
private final int applyDeletes(HashMap deleteTerms, IndexReader reader)
|
||||||
throws CorruptIndexException, IOException {
|
throws CorruptIndexException, IOException {
|
||||||
Iterator iter = deleteTerms.entrySet().iterator();
|
Iterator iter = deleteTerms.entrySet().iterator();
|
||||||
|
int delCount = 0;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
Entry entry = (Entry) iter.next();
|
Entry entry = (Entry) iter.next();
|
||||||
reader.deleteDocuments((Term) entry.getKey());
|
delCount += reader.deleteDocuments((Term) entry.getKey());
|
||||||
}
|
}
|
||||||
|
return delCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized String segString() {
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
for(int i = 0; i < segmentInfos.size(); i++) {
|
||||||
|
if (i > 0) {
|
||||||
|
buffer.append(' ');
|
||||||
|
}
|
||||||
|
buffer.append(segmentInfos.info(i).name + ":" + segmentInfos.info(i).docCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
return buffer.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,184 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2004 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.*;
|
||||||
|
import org.apache.lucene.store.*;
|
||||||
|
import org.apache.lucene.document.*;
|
||||||
|
import org.apache.lucene.analysis.*;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
|
import org.apache.lucene.queryParser.*;
|
||||||
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
public class TestAtomicUpdate extends TestCase {
|
||||||
|
private static final Analyzer ANALYZER = new SimpleAnalyzer();
|
||||||
|
private static final Random RANDOM = new Random();
|
||||||
|
|
||||||
|
private static abstract class TimedThread extends Thread {
|
||||||
|
boolean failed;
|
||||||
|
int count;
|
||||||
|
private static int RUN_TIME_SEC = 3;
|
||||||
|
private TimedThread[] allThreads;
|
||||||
|
|
||||||
|
abstract public void doWork() throws Throwable;
|
||||||
|
|
||||||
|
TimedThread(TimedThread[] threads) {
|
||||||
|
this.allThreads = threads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
final long stopTime = System.currentTimeMillis() + 1000*RUN_TIME_SEC;
|
||||||
|
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
while(System.currentTimeMillis() < stopTime && !anyErrors()) {
|
||||||
|
doWork();
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
} catch (Throwable e) {
|
||||||
|
e.printStackTrace(System.out);
|
||||||
|
failed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean anyErrors() {
|
||||||
|
for(int i=0;i<allThreads.length;i++)
|
||||||
|
if (allThreads[i] != null && allThreads[i].failed)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class IndexerThread extends TimedThread {
|
||||||
|
IndexWriter writer;
|
||||||
|
public int count;
|
||||||
|
|
||||||
|
public IndexerThread(IndexWriter writer, TimedThread[] threads) {
|
||||||
|
super(threads);
|
||||||
|
this.writer = writer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void doWork() throws Exception {
|
||||||
|
// Update all 100 docs...
|
||||||
|
for(int i=0; i<100; i++) {
|
||||||
|
Document d = new Document();
|
||||||
|
int n = RANDOM.nextInt();
|
||||||
|
d.add(new Field("id", Integer.toString(i), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
d.add(new Field("contents", English.intToEnglish(i+10*count), Field.Store.NO, Field.Index.TOKENIZED));
|
||||||
|
writer.updateDocument(new Term("id", Integer.toString(i)), d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class SearcherThread extends TimedThread {
|
||||||
|
private Directory directory;
|
||||||
|
|
||||||
|
public SearcherThread(Directory directory, TimedThread[] threads) {
|
||||||
|
super(threads);
|
||||||
|
this.directory = directory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void doWork() throws Throwable {
|
||||||
|
IndexReader r = IndexReader.open(directory);
|
||||||
|
try {
|
||||||
|
assertEquals(100, r.numDocs());
|
||||||
|
} catch (Throwable t) {
|
||||||
|
throw t;
|
||||||
|
}
|
||||||
|
r.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Run one indexer and 2 searchers against single index as
|
||||||
|
stress test.
|
||||||
|
*/
|
||||||
|
public void runTest(Directory directory) throws Exception {
|
||||||
|
|
||||||
|
TimedThread[] threads = new TimedThread[4];
|
||||||
|
|
||||||
|
IndexWriter writer = new IndexWriter(directory, ANALYZER, true);
|
||||||
|
|
||||||
|
// Establish a base index of 100 docs:
|
||||||
|
for(int i=0;i<100;i++) {
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(new Field("id", Integer.toString(i), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
d.add(new Field("contents", English.intToEnglish(i), Field.Store.NO, Field.Index.TOKENIZED));
|
||||||
|
writer.addDocument(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexerThread indexerThread = new IndexerThread(writer, threads);
|
||||||
|
threads[0] = indexerThread;
|
||||||
|
indexerThread.start();
|
||||||
|
|
||||||
|
IndexerThread indexerThread2 = new IndexerThread(writer, threads);
|
||||||
|
threads[1] = indexerThread2;
|
||||||
|
indexerThread2.start();
|
||||||
|
|
||||||
|
SearcherThread searcherThread1 = new SearcherThread(directory, threads);
|
||||||
|
threads[2] = searcherThread1;
|
||||||
|
searcherThread1.start();
|
||||||
|
|
||||||
|
SearcherThread searcherThread2 = new SearcherThread(directory, threads);
|
||||||
|
threads[3] = searcherThread2;
|
||||||
|
searcherThread2.start();
|
||||||
|
|
||||||
|
indexerThread.join();
|
||||||
|
indexerThread2.join();
|
||||||
|
searcherThread1.join();
|
||||||
|
searcherThread2.join();
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
assertTrue("hit unexpected exception in indexer", !indexerThread.failed);
|
||||||
|
assertTrue("hit unexpected exception in indexer2", !indexerThread2.failed);
|
||||||
|
assertTrue("hit unexpected exception in search1", !searcherThread1.failed);
|
||||||
|
assertTrue("hit unexpected exception in search2", !searcherThread2.failed);
|
||||||
|
//System.out.println(" Writer: " + indexerThread.count + " iterations");
|
||||||
|
//System.out.println("Searcher 1: " + searcherThread1.count + " searchers created");
|
||||||
|
//System.out.println("Searcher 2: " + searcherThread2.count + " searchers created");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Run above stress test against RAMDirectory and then
|
||||||
|
FSDirectory.
|
||||||
|
*/
|
||||||
|
public void testAtomicUpdates() throws Exception {
|
||||||
|
|
||||||
|
Directory directory;
|
||||||
|
|
||||||
|
// First in a RAM directory:
|
||||||
|
directory = new MockRAMDirectory();
|
||||||
|
runTest(directory);
|
||||||
|
directory.close();
|
||||||
|
|
||||||
|
// Second in an FSDirectory:
|
||||||
|
String tempDir = System.getProperty("java.io.tmpdir");
|
||||||
|
File dirPath = new File(tempDir, "lucene.test.atomic");
|
||||||
|
directory = FSDirectory.getDirectory(dirPath);
|
||||||
|
runTest(directory);
|
||||||
|
directory.close();
|
||||||
|
_TestUtil.rmDir(dirPath);
|
||||||
|
}
|
||||||
|
}
|
|
@ -436,6 +436,7 @@ public class TestIndexWriterDelete extends TestCase {
|
||||||
catch (IOException e) {
|
catch (IOException e) {
|
||||||
if (debug) {
|
if (debug) {
|
||||||
System.out.println(" hit IOException: " + e);
|
System.out.println(" hit IOException: " + e);
|
||||||
|
e.printStackTrace(System.out);
|
||||||
}
|
}
|
||||||
err = e;
|
err = e;
|
||||||
if (1 == x) {
|
if (1 == x) {
|
||||||
|
@ -503,10 +504,20 @@ public class TestIndexWriterDelete extends TestCase {
|
||||||
}
|
}
|
||||||
int result2 = hits.length();
|
int result2 = hits.length();
|
||||||
if (success) {
|
if (success) {
|
||||||
if (result2 != END_COUNT) {
|
if (x == 0 && result2 != END_COUNT) {
|
||||||
fail(testName
|
fail(testName
|
||||||
+ ": method did not throw exception but hits.length for search on term 'aaa' is "
|
+ ": method did not throw exception but hits.length for search on term 'aaa' is "
|
||||||
+ result2 + " instead of expected " + END_COUNT);
|
+ result2 + " instead of expected " + END_COUNT);
|
||||||
|
} else if (x == 1 && result2 != START_COUNT && result2 != END_COUNT) {
|
||||||
|
// It's possible that the first exception was
|
||||||
|
// "recoverable" wrt pending deletes, in which
|
||||||
|
// case the pending deletes are retained and
|
||||||
|
// then re-flushing (with plenty of disk
|
||||||
|
// space) will succeed in flushing the
|
||||||
|
// deletes:
|
||||||
|
fail(testName
|
||||||
|
+ ": method did not throw exception but hits.length for search on term 'aaa' is "
|
||||||
|
+ result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// On hitting exception we still may have added
|
// On hitting exception we still may have added
|
||||||
|
@ -515,7 +526,7 @@ public class TestIndexWriterDelete extends TestCase {
|
||||||
err.printStackTrace();
|
err.printStackTrace();
|
||||||
fail(testName
|
fail(testName
|
||||||
+ ": method did throw exception but hits.length for search on term 'aaa' is "
|
+ ": method did throw exception but hits.length for search on term 'aaa' is "
|
||||||
+ result2 + " instead of expected " + START_COUNT);
|
+ result2 + " instead of expected " + START_COUNT + " or " + END_COUNT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -535,10 +546,8 @@ public class TestIndexWriterDelete extends TestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This test tests that buffered deletes are not lost due to i/o
|
// This test tests that buffered deletes are cleared when
|
||||||
// errors occurring after the buffered deletes have been flushed but
|
// an Exception is hit during flush.
|
||||||
// before the segmentInfos have been successfully written
|
|
||||||
|
|
||||||
public void testErrorAfterApplyDeletes() throws IOException {
|
public void testErrorAfterApplyDeletes() throws IOException {
|
||||||
|
|
||||||
MockRAMDirectory.Failure failure = new MockRAMDirectory.Failure() {
|
MockRAMDirectory.Failure failure = new MockRAMDirectory.Failure() {
|
||||||
|
@ -662,9 +671,11 @@ public class TestIndexWriterDelete extends TestCase {
|
||||||
|
|
||||||
hitCount = getHitCount(dir, term);
|
hitCount = getHitCount(dir, term);
|
||||||
|
|
||||||
// If we haven't lost the delete the hit count will be zero
|
// If the delete was not cleared then hit count will
|
||||||
|
// be 0. With autoCommit=false, we hit the exception
|
||||||
assertEquals(0, hitCount);
|
// on creating the compound file, so the delete was
|
||||||
|
// flushed successfully.
|
||||||
|
assertEquals(autoCommit ? 1:0, hitCount);
|
||||||
|
|
||||||
if (autoCommit) {
|
if (autoCommit) {
|
||||||
modifier.close();
|
modifier.close();
|
||||||
|
|
Loading…
Reference in New Issue