diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4a5f7ff9610..00aaf861501 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -189,6 +189,9 @@ Bug fixes * LUCENE-9365: FuzzyQuery was missing matches when prefix length was equal to the term length (Mark Harwood, Mike Drob) +* LUCENE-9617: Fix per-field memory leak in IndexWriter.deleteAll(). Reset next available internal + field number to 0 on FieldInfos.clear(), to avoid wasting FieldInfo references. (Michael Froh) + Other * LUCENE-9631: Properly override slice() on subclasses of OffsetRange. (Dawid Weiss) diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java index 95755ff2d87..9d081a2fde6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java @@ -481,6 +481,7 @@ public class FieldInfos implements Iterable { indexOptions.clear(); docValuesType.clear(); dimensions.clear(); + lowestUnassignedFieldNumber = -1; } synchronized void setIndexOptions(int number, String name, IndexOptions indexOptions) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java index 321fcf7dc68..735d1c393ff 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java @@ -187,4 +187,23 @@ public class TestFieldInfos extends LuceneTestCase { writer.close(); dir.close(); } + + public void testFieldNumbersAutoIncrement() { + FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes"); + for (int i = 0; i < 10; i++) { + fieldNumbers.addOrGet("field" + i, -1, IndexOptions.NONE, DocValuesType.NONE, + 0, 0, 0, 0, + VectorValues.SearchStrategy.NONE, false); + } + int idx = fieldNumbers.addOrGet("EleventhField", -1, IndexOptions.NONE, DocValuesType.NONE, + 0, 0, 0, 0, + VectorValues.SearchStrategy.NONE, false); + assertEquals("Field numbers 0 through 9 were allocated", 10, idx); + + fieldNumbers.clear(); + idx = fieldNumbers.addOrGet("PostClearField", -1, IndexOptions.NONE, DocValuesType.NONE, + 0, 0, 0, 0, + VectorValues.SearchStrategy.NONE, false); + assertEquals("Field numbers should reset after clear()", 0, idx); + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java index 1f9781ac6e0..69bf706af00 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java @@ -20,6 +20,7 @@ package org.apache.lucene.index; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; +import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -28,6 +29,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; @@ -36,6 +38,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; @@ -355,17 +358,17 @@ public class TestIndexWriterDelete extends LuceneTestCase { if (VERBOSE) { System.out.println("\nTEST: now final deleteAll"); } - + modifier.deleteAll(); for (Thread thread : threads) { thread.join(); } - + if (VERBOSE) { System.out.println("\nTEST: now close"); } modifier.close(); - + DirectoryReader reader = DirectoryReader.open(dir); if (VERBOSE) { System.out.println("\nTEST: got reader=" + reader); @@ -453,6 +456,50 @@ public class TestIndexWriterDelete extends LuceneTestCase { dir.close(); } + // Verify that we can call deleteAll repeatedly without leaking field numbers such that we trigger OOME + // on creation of FieldInfos. See https://issues.apache.org/jira/browse/LUCENE-9617 + @Nightly // Takes 1-2 minutes to run on a 16-core machine + public void testDeleteAllRepeated() throws IOException, InterruptedException { + final int breakingFieldCount = 50_000_000; + try (Directory dir = newDirectory()) { + // Avoid flushing until the end of the test to save time. + IndexWriterConfig conf = newIndexWriterConfig() + .setMaxBufferedDocs(1000) + .setRAMBufferSizeMB(1000) + .setRAMPerThreadHardLimitMB(1000) + .setCheckPendingFlushUpdate(false); + try (IndexWriter modifier = new IndexWriter(dir, conf)) { + Document document = new Document(); + int fieldsPerDoc = 1_000; + for (int i = 0; i < fieldsPerDoc; i++) { + document.add(new StoredField("field" + i, "")); + } + AtomicLong numFields = new AtomicLong(0); + List threads = new ArrayList<>(); + int nThreads = atLeast(8); + for (int i = 0; i < nThreads; i++) { + Thread t = new Thread(() -> { + try { + while (numFields.getAndAdd(fieldsPerDoc) < breakingFieldCount) { + modifier.addDocument(document); + modifier.deleteAll(); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + t.start(); + threads.add(t); + } + for (Thread t : threads) { + t.join(); + } + // Add one last document and flush to build FieldInfos. + modifier.addDocument(document); + modifier.flush(); + } + } + } private void updateDoc(IndexWriter modifier, int id, int value) throws IOException { @@ -944,7 +991,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { modifier.close(); dir.close(); } - + public void testDeleteAllSlowly() throws Exception { final Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); @@ -982,7 +1029,7 @@ public class TestIndexWriterDelete extends LuceneTestCase { w.close(); dir.close(); } - + // TODO: this test can hit pathological cases (IW settings?) where it runs for far too long @Nightly public void testIndexingThenDeleting() throws Exception {