Run merge-on-full-flush even though no changes got flushed. (#12549)

Currently, merge-on-full-flush only checks if merges need to run if changes
have been flushed to disk. This prevents from having different merging logic
for refreshes and commits, since the merge policy would not be checked upon
commit if no new documents got indexed since the previous refresh.
This commit is contained in:
Adrien Grand 2023-10-24 16:57:05 +02:00
parent 779592771a
commit 3d4082ca3d
7 changed files with 21 additions and 17 deletions

View File

@ -3680,7 +3680,7 @@ public class IndexWriter
// merge completes which would otherwise have
// removed the files we are now syncing.
deleter.incRef(toCommit.files(false));
if (anyChanges && maxCommitMergeWaitMillis > 0) {
if (maxCommitMergeWaitMillis > 0) {
// we can safely call preparePointInTimeMerge since writeReaderPool(true) above
// wrote all
// necessary files to disk and checkpointed them.
@ -4232,7 +4232,6 @@ public class IndexWriter
flushSuccess = true;
} finally {
assert Thread.holdsLock(fullFlushLock);
;
docWriter.finishFullFlush(flushSuccess);
processEvents(false);
}

View File

@ -654,7 +654,8 @@ public class TestDirectoryReaderReopen extends LuceneTestCase {
public void testOverDecRefDuringReopen() throws Exception {
MockDirectoryWrapper dir = newMockDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriterConfig iwc =
new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setCodec(TestUtil.getDefaultCodec());
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();

View File

@ -519,11 +519,10 @@ public class TestIndexWriter extends LuceneTestCase {
doc.add(newField("field", "aaa", customType));
for (int i = 0; i < 19; i++) writer.addDocument(doc);
writer.flush(false, true);
writer.close();
SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
// Since we flushed w/o allowing merging we should now
// have 10 segments
assertEquals(10, sis.size());
assertEquals(10, writer.getSegmentCount());
writer.close();
dir.close();
}

View File

@ -1314,7 +1314,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
w.addDocument(doc);
w.close();
iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc = new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
w = new IndexWriter(d, iwc);
IndexReader r = DirectoryReader.open(w, false, false);

View File

@ -394,14 +394,13 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase {
.setMaxFullFlushMergeWaitMillis(Integer.MAX_VALUE);
IndexWriter writerWithMergePolicy = new IndexWriter(dir, iwc);
writerWithMergePolicy.commit(); // No changes. Commit doesn't trigger a merge.
// No changes. Refresh doesn't trigger a merge.
DirectoryReader unmergedReader = DirectoryReader.open(writerWithMergePolicy);
assertEquals(5, unmergedReader.leaves().size());
unmergedReader.close();
TestIndexWriter.addDoc(writerWithMergePolicy);
writerWithMergePolicy.commit(); // Doc added, do merge on commit.
writerWithMergePolicy.commit(); // Do merge on commit.
assertEquals(1, writerWithMergePolicy.getSegmentCount()); //
DirectoryReader mergedReader = DirectoryReader.open(writerWithMergePolicy);
@ -410,8 +409,8 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase {
try (IndexReader reader = DirectoryReader.open(writerWithMergePolicy)) {
IndexSearcher searcher = new IndexSearcher(reader);
assertEquals(6, reader.numDocs());
assertEquals(6, searcher.count(new MatchAllDocsQuery()));
assertEquals(5, reader.numDocs());
assertEquals(5, searcher.count(new MatchAllDocsQuery()));
}
writerWithMergePolicy.close();
@ -445,16 +444,14 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase {
.setIndexWriterEventListener(eventListener);
IndexWriter writerWithMergePolicy = new IndexWriter(dir, iwc);
writerWithMergePolicy.commit(); // No changes. Commit doesn't trigger a merge.
// No changes. Refresh doesn't trigger a merge.
DirectoryReader unmergedReader = DirectoryReader.open(writerWithMergePolicy);
assertEquals(5, unmergedReader.leaves().size());
unmergedReader.close();
TestIndexWriter.addDoc(writerWithMergePolicy);
assertFalse(eventListener.isEventsRecorded());
writerWithMergePolicy.commit(); // Doc added, do merge on commit.
writerWithMergePolicy.commit(); // Do merge on commit.
assertEquals(1, writerWithMergePolicy.getSegmentCount()); //
assertTrue(eventListener.isEventsRecorded());

View File

@ -263,6 +263,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
newIndexWriterConfig()
.setSoftDeletesField("_soft_deletes")
.setMaxBufferedDocs(3) // make sure we write one segment
.setMergePolicy(NoMergePolicy.INSTANCE) // prevent deletes from triggering merges
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH));
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));
@ -331,6 +332,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
newIndexWriterConfig()
.setSoftDeletesField("_soft_deletes")
.setMaxBufferedDocs(3) // make sure we write one segment
.setMergePolicy(NoMergePolicy.INSTANCE) // prevent deletes from triggering merges
.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH));
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.YES));

View File

@ -46,6 +46,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoDeletionPolicy;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
@ -970,7 +971,12 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
DirectoryReader ir1 = DirectoryReader.open(this);
int numDocs1 = ir1.numDocs();
ir1.close();
new IndexWriter(this, new IndexWriterConfig(null)).close();
// Use a serial merge scheduler, otherwise merges may be scheduled on a different thread
// and will deadlock since the lock on `this` is already taken by close(), so
// createOutput() won't be able to take it.
new IndexWriter(
this, new IndexWriterConfig(null).setMergeScheduler(new SerialMergeScheduler()))
.close();
DirectoryReader ir2 = DirectoryReader.open(this);
int numDocs2 = ir2.numDocs();
ir2.close();