mirror of https://github.com/apache/lucene.git
LUCENE-9617: Reset lowestUnassignedFieldNumber in FieldNumbers.clear() (#2088)
* LUCENE-9617: Reset lowestUnassignedFieldNumber in FieldNumbers.clear() FieldNumbers.clear() is called from IndexWriter.deleteAll(), which is supposed to completely reset the state of the index. This includes clearing all known fields. Prior to this change, it would allocate progressively higher field numbers, which results in larger and larger arrays for FieldInfos.byNumber, effectively "leaking" field numbers every time deleteAll() is called. Co-authored-by: Michael Froh <froh@amazon.com>
This commit is contained in:
parent
734c533cca
commit
8e162e2670
|
@ -189,6 +189,9 @@ Bug fixes
|
||||||
* LUCENE-9365: FuzzyQuery was missing matches when prefix length was equal to the term length
|
* LUCENE-9365: FuzzyQuery was missing matches when prefix length was equal to the term length
|
||||||
(Mark Harwood, Mike Drob)
|
(Mark Harwood, Mike Drob)
|
||||||
|
|
||||||
|
* LUCENE-9617: Fix per-field memory leak in IndexWriter.deleteAll(). Reset next available internal
|
||||||
|
field number to 0 on FieldInfos.clear(), to avoid wasting FieldInfo references. (Michael Froh)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-9631: Properly override slice() on subclasses of OffsetRange. (Dawid Weiss)
|
* LUCENE-9631: Properly override slice() on subclasses of OffsetRange. (Dawid Weiss)
|
||||||
|
|
|
@ -481,6 +481,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
||||||
indexOptions.clear();
|
indexOptions.clear();
|
||||||
docValuesType.clear();
|
docValuesType.clear();
|
||||||
dimensions.clear();
|
dimensions.clear();
|
||||||
|
lowestUnassignedFieldNumber = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void setIndexOptions(int number, String name, IndexOptions indexOptions) {
|
synchronized void setIndexOptions(int number, String name, IndexOptions indexOptions) {
|
||||||
|
|
|
@ -187,4 +187,23 @@ public class TestFieldInfos extends LuceneTestCase {
|
||||||
writer.close();
|
writer.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testFieldNumbersAutoIncrement() {
|
||||||
|
FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes");
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
fieldNumbers.addOrGet("field" + i, -1, IndexOptions.NONE, DocValuesType.NONE,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
VectorValues.SearchStrategy.NONE, false);
|
||||||
|
}
|
||||||
|
int idx = fieldNumbers.addOrGet("EleventhField", -1, IndexOptions.NONE, DocValuesType.NONE,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
VectorValues.SearchStrategy.NONE, false);
|
||||||
|
assertEquals("Field numbers 0 through 9 were allocated", 10, idx);
|
||||||
|
|
||||||
|
fieldNumbers.clear();
|
||||||
|
idx = fieldNumbers.addOrGet("PostClearField", -1, IndexOptions.NONE, DocValuesType.NONE,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
VectorValues.SearchStrategy.NONE, false);
|
||||||
|
assertEquals("Field numbers should reset after clear()", 0, idx);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -28,6 +29,7 @@ import java.util.concurrent.CountDownLatch;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
@ -36,6 +38,7 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.StoredField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
@ -355,17 +358,17 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("\nTEST: now final deleteAll");
|
System.out.println("\nTEST: now final deleteAll");
|
||||||
}
|
}
|
||||||
|
|
||||||
modifier.deleteAll();
|
modifier.deleteAll();
|
||||||
for (Thread thread : threads) {
|
for (Thread thread : threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("\nTEST: now close");
|
System.out.println("\nTEST: now close");
|
||||||
}
|
}
|
||||||
modifier.close();
|
modifier.close();
|
||||||
|
|
||||||
DirectoryReader reader = DirectoryReader.open(dir);
|
DirectoryReader reader = DirectoryReader.open(dir);
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("\nTEST: got reader=" + reader);
|
System.out.println("\nTEST: got reader=" + reader);
|
||||||
|
@ -453,6 +456,50 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify that we can call deleteAll repeatedly without leaking field numbers such that we trigger OOME
|
||||||
|
// on creation of FieldInfos. See https://issues.apache.org/jira/browse/LUCENE-9617
|
||||||
|
@Nightly // Takes 1-2 minutes to run on a 16-core machine
|
||||||
|
public void testDeleteAllRepeated() throws IOException, InterruptedException {
|
||||||
|
final int breakingFieldCount = 50_000_000;
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
// Avoid flushing until the end of the test to save time.
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig()
|
||||||
|
.setMaxBufferedDocs(1000)
|
||||||
|
.setRAMBufferSizeMB(1000)
|
||||||
|
.setRAMPerThreadHardLimitMB(1000)
|
||||||
|
.setCheckPendingFlushUpdate(false);
|
||||||
|
try (IndexWriter modifier = new IndexWriter(dir, conf)) {
|
||||||
|
Document document = new Document();
|
||||||
|
int fieldsPerDoc = 1_000;
|
||||||
|
for (int i = 0; i < fieldsPerDoc; i++) {
|
||||||
|
document.add(new StoredField("field" + i, ""));
|
||||||
|
}
|
||||||
|
AtomicLong numFields = new AtomicLong(0);
|
||||||
|
List<Thread> threads = new ArrayList<>();
|
||||||
|
int nThreads = atLeast(8);
|
||||||
|
for (int i = 0; i < nThreads; i++) {
|
||||||
|
Thread t = new Thread(() -> {
|
||||||
|
try {
|
||||||
|
while (numFields.getAndAdd(fieldsPerDoc) < breakingFieldCount) {
|
||||||
|
modifier.addDocument(document);
|
||||||
|
modifier.deleteAll();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
t.start();
|
||||||
|
threads.add(t);
|
||||||
|
}
|
||||||
|
for (Thread t : threads) {
|
||||||
|
t.join();
|
||||||
|
}
|
||||||
|
// Add one last document and flush to build FieldInfos.
|
||||||
|
modifier.addDocument(document);
|
||||||
|
modifier.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void updateDoc(IndexWriter modifier, int id, int value)
|
private void updateDoc(IndexWriter modifier, int id, int value)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -944,7 +991,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
||||||
modifier.close();
|
modifier.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDeleteAllSlowly() throws Exception {
|
public void testDeleteAllSlowly() throws Exception {
|
||||||
final Directory dir = newDirectory();
|
final Directory dir = newDirectory();
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
@ -982,7 +1029,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
||||||
w.close();
|
w.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: this test can hit pathological cases (IW settings?) where it runs for far too long
|
// TODO: this test can hit pathological cases (IW settings?) where it runs for far too long
|
||||||
@Nightly
|
@Nightly
|
||||||
public void testIndexingThenDeleting() throws Exception {
|
public void testIndexingThenDeleting() throws Exception {
|
||||||
|
|
Loading…
Reference in New Issue