mirror of https://github.com/apache/lucene.git
LUCENE-9298: Improve RAM accounting in BufferedUpdates when deleted doc IDs and terms are cleared (#1389)
This commit is contained in:
parent
adbd714b37
commit
2935186c5b
|
@ -255,6 +255,8 @@ Improvements
|
|||
* LUCENE-9171: QueryBuilder can now use BoostAttributes on input token streams to selectively
|
||||
boost particular terms or synonyms in parsed queries. (Alessandro Benedetti, Alan Woodward)
|
||||
|
||||
* LUCENE-9298: Improve RAM accounting in BufferedUpdates when deleted doc IDs and terms are cleared. (Yu Binglei, Simon Willnauer)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -80,6 +80,7 @@ class BufferedUpdates implements Accountable {
|
|||
|
||||
private final Counter bytesUsed = Counter.newCounter(true);
|
||||
final Counter fieldUpdatesBytesUsed = Counter.newCounter(true);
|
||||
private final Counter termsBytesUsed = Counter.newCounter(true);
|
||||
|
||||
private final static boolean VERBOSE_DELETES = false;
|
||||
|
||||
|
@ -151,7 +152,7 @@ class BufferedUpdates implements Accountable {
|
|||
// is done to respect IndexWriterConfig.setMaxBufferedDeleteTerms.
|
||||
numTermDeletes.incrementAndGet();
|
||||
if (current == null) {
|
||||
bytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length + (Character.BYTES * term.field().length()));
|
||||
termsBytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length + (Character.BYTES * term.field().length()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,8 +177,9 @@ class BufferedUpdates implements Accountable {
|
|||
}
|
||||
|
||||
void clearDeleteTerms() {
|
||||
deleteTerms.clear();
|
||||
numTermDeletes.set(0);
|
||||
termsBytesUsed.addAndGet(-termsBytesUsed.get());
|
||||
deleteTerms.clear();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
|
@ -189,6 +191,7 @@ class BufferedUpdates implements Accountable {
|
|||
fieldUpdates.clear();
|
||||
bytesUsed.addAndGet(-bytesUsed.get());
|
||||
fieldUpdatesBytesUsed.addAndGet(-fieldUpdatesBytesUsed.get());
|
||||
termsBytesUsed.addAndGet(-termsBytesUsed.get());
|
||||
}
|
||||
|
||||
boolean any() {
|
||||
|
@ -197,11 +200,11 @@ class BufferedUpdates implements Accountable {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return bytesUsed.get() + fieldUpdatesBytesUsed.get();
|
||||
return bytesUsed.get() + fieldUpdatesBytesUsed.get() + termsBytesUsed.get();
|
||||
}
|
||||
|
||||
void clearDeletedDocIds() {
|
||||
deleteDocIDs.clear();
|
||||
bytesUsed.addAndGet(-deleteDocIDs.size() * BufferedUpdates.BYTES_PER_DEL_DOCID);
|
||||
deleteDocIDs.clear();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* Unit test for {@link BufferedUpdates}
|
||||
*/
|
||||
public class TestBufferedUpdates extends LuceneTestCase {
|
||||
/**
|
||||
* return a term that maybe duplicated with pre
|
||||
*/
|
||||
private static Term mayDuplicate(int bound) {
|
||||
boolean shouldDuplicated = bound > 3 && random().nextBoolean();
|
||||
if (shouldDuplicated) {
|
||||
return new Term("myField", String.valueOf(random().nextInt(bound)));
|
||||
}
|
||||
return new Term("myField", String.valueOf(bound));
|
||||
}
|
||||
|
||||
public void testRamBytesUsed() {
|
||||
BufferedUpdates bu = new BufferedUpdates("seg1");
|
||||
assertEquals(bu.ramBytesUsed(), 0L);
|
||||
assertFalse(bu.any());
|
||||
IntStream.range(0, random().nextInt(atLeast(200))).forEach(id -> {
|
||||
int reminder = random().nextInt(3);
|
||||
if (reminder == 0) {
|
||||
bu.addDocID(id);
|
||||
} else if (reminder == 1) {
|
||||
bu.addQuery(new TermQuery(mayDuplicate(id)), id);
|
||||
} else if (reminder == 2) {
|
||||
bu.addTerm((mayDuplicate(id)), id);
|
||||
}
|
||||
});
|
||||
assertTrue("we have added tons of docIds, terms and queries", bu.any());
|
||||
|
||||
long totalUsed = bu.ramBytesUsed();
|
||||
assertTrue(totalUsed > 0);
|
||||
|
||||
bu.clearDeletedDocIds();
|
||||
assertTrue("only docIds are cleaned, buffer shouldn't be empty", bu.any());
|
||||
assertTrue("docIds are cleaned, ram in used should decrease", totalUsed > bu.ramBytesUsed());
|
||||
totalUsed = bu.ramBytesUsed();
|
||||
|
||||
bu.clearDeleteTerms();
|
||||
assertTrue("only terms and docIds are cleaned, the queries are still in memory", bu.any());
|
||||
assertTrue("terms are cleaned, ram in used should decrease", totalUsed > bu.ramBytesUsed());
|
||||
|
||||
bu.clear();
|
||||
assertFalse(bu.any());
|
||||
assertEquals(bu.ramBytesUsed(), 0L);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue