mirror of https://github.com/apache/lucene.git
Ensure caching all leaves from the upper tier (#12147)
This change adjusts the cache policy to ensure that all segments in the max tier to be cached. Before, we cache segments that have more than 3% of the total documents in the index; now cache segments have more than half of the average documents per leave of the index. Closes #12140
This commit is contained in:
parent
54044a82a0
commit
8e15c665be
|
@ -139,25 +139,21 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
||||||
/**
|
/**
|
||||||
* Create a new instance that will cache at most <code>maxSize</code> queries with at most <code>
|
* Create a new instance that will cache at most <code>maxSize</code> queries with at most <code>
|
||||||
* maxRamBytesUsed</code> bytes of memory. Queries will only be cached on leaves that have more
|
* maxRamBytesUsed</code> bytes of memory. Queries will only be cached on leaves that have more
|
||||||
* than 10k documents and have more than 3% of the total number of documents in the index. This
|
* than 10k documents and have more than half of the average documents per leave of the index.
|
||||||
* should guarantee that all leaves from the upper {@link TieredMergePolicy tier} will be cached
|
* This should guarantee that all leaves from the upper {@link TieredMergePolicy tier} will be
|
||||||
* while ensuring that at most <code>33</code> leaves can make it to the cache (very likely less
|
* cached. Only clauses whose cost is at most 100x the cost of the top-level query will be cached
|
||||||
* than 10 in practice), which is useful for this implementation since some operations perform in
|
* in order to not hurt latency too much because of caching.
|
||||||
* linear time with the number of cached leaves. Only clauses whose cost is at most 100x the cost
|
|
||||||
* of the top-level query will be cached in order to not hurt latency too much because of caching.
|
|
||||||
*/
|
*/
|
||||||
public LRUQueryCache(int maxSize, long maxRamBytesUsed) {
|
public LRUQueryCache(int maxSize, long maxRamBytesUsed) {
|
||||||
this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000, .03f), 10);
|
this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000), 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
// pkg-private for testing
|
// pkg-private for testing
|
||||||
static class MinSegmentSizePredicate implements Predicate<LeafReaderContext> {
|
static class MinSegmentSizePredicate implements Predicate<LeafReaderContext> {
|
||||||
private final int minSize;
|
private final int minSize;
|
||||||
private final float minSizeRatio;
|
|
||||||
|
|
||||||
MinSegmentSizePredicate(int minSize, float minSizeRatio) {
|
MinSegmentSizePredicate(int minSize) {
|
||||||
this.minSize = minSize;
|
this.minSize = minSize;
|
||||||
this.minSizeRatio = minSizeRatio;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -167,8 +163,9 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
|
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
|
||||||
final float sizeRatio = (float) context.reader().maxDoc() / topLevelContext.reader().maxDoc();
|
final int averageTotalDocs =
|
||||||
return sizeRatio >= minSizeRatio;
|
topLevelContext.reader().maxDoc() / topLevelContext.leaves().size();
|
||||||
|
return maxDoc * 2 > averageTotalDocs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,10 @@ import static org.apache.lucene.util.RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_E
|
||||||
import static org.apache.lucene.util.RamUsageEstimator.LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
|
import static org.apache.lucene.util.RamUsageEstimator.LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
|
||||||
import static org.apache.lucene.util.RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED;
|
import static org.apache.lucene.util.RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -39,6 +41,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
import java.util.function.IntConsumer;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
@ -1387,46 +1390,45 @@ public class TestLRUQueryCache extends LuceneTestCase {
|
||||||
|
|
||||||
public void testMinSegmentSizePredicate() throws IOException {
|
public void testMinSegmentSizePredicate() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
IndexWriterConfig iwc = new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
w.addDocument(new Document());
|
IntConsumer newSegment =
|
||||||
DirectoryReader reader = w.getReader();
|
numDocs -> {
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
try {
|
||||||
searcher.setQueryCachingPolicy(ALWAYS_CACHE);
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
w.addDocument(new Document());
|
||||||
LRUQueryCache cache =
|
}
|
||||||
new LRUQueryCache(
|
w.flush();
|
||||||
2, 10000, new LRUQueryCache.MinSegmentSizePredicate(2, 0f), Float.POSITIVE_INFINITY);
|
} catch (IOException e) {
|
||||||
searcher.setQueryCache(cache);
|
throw new UncheckedIOException(e);
|
||||||
searcher.count(new DummyQuery());
|
}
|
||||||
assertEquals(0, cache.getCacheCount());
|
};
|
||||||
|
newSegment.accept(1);
|
||||||
cache =
|
newSegment.accept(4);
|
||||||
new LRUQueryCache(
|
newSegment.accept(10);
|
||||||
2, 10000, new LRUQueryCache.MinSegmentSizePredicate(1, 0f), Float.POSITIVE_INFINITY);
|
newSegment.accept(35);
|
||||||
searcher.setQueryCache(cache);
|
int numLargeSegments = RandomNumbers.randomIntBetween(random(), 2, 40);
|
||||||
searcher.count(new DummyQuery());
|
for (int i = 0; i < numLargeSegments; i++) {
|
||||||
assertEquals(1, cache.getCacheCount());
|
newSegment.accept(RandomNumbers.randomIntBetween(random(), 50, 55));
|
||||||
|
}
|
||||||
cache =
|
DirectoryReader reader = DirectoryReader.open(w);
|
||||||
new LRUQueryCache(
|
for (int i = 0; i < 3; i++) {
|
||||||
2, 10000, new LRUQueryCache.MinSegmentSizePredicate(0, .6f), Float.POSITIVE_INFINITY);
|
var predicate =
|
||||||
searcher.setQueryCache(cache);
|
new LRUQueryCache.MinSegmentSizePredicate(
|
||||||
searcher.count(new DummyQuery());
|
RandomNumbers.randomIntBetween(random(), 1, Integer.MAX_VALUE));
|
||||||
assertEquals(1, cache.getCacheCount());
|
assertFalse(predicate.test(reader.leaves().get(i)));
|
||||||
|
}
|
||||||
w.addDocument(new Document());
|
for (int i = 3; i < reader.leaves().size(); i++) {
|
||||||
reader.close();
|
var leaf = reader.leaves().get(i);
|
||||||
reader = w.getReader();
|
var small =
|
||||||
searcher = newSearcher(reader);
|
new LRUQueryCache.MinSegmentSizePredicate(
|
||||||
searcher.setQueryCachingPolicy(ALWAYS_CACHE);
|
RandomNumbers.randomIntBetween(random(), 60, Integer.MAX_VALUE));
|
||||||
cache =
|
assertFalse(small.test(leaf));
|
||||||
new LRUQueryCache(
|
var big =
|
||||||
2, 10000, new LRUQueryCache.MinSegmentSizePredicate(0, .6f), Float.POSITIVE_INFINITY);
|
new LRUQueryCache.MinSegmentSizePredicate(
|
||||||
searcher.setQueryCache(cache);
|
RandomNumbers.randomIntBetween(random(), 10, 30));
|
||||||
searcher.count(new DummyQuery());
|
assertTrue(big.test(leaf));
|
||||||
assertEquals(0, cache.getCacheCount());
|
}
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
w.close();
|
w.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
|
|
|
@ -75,11 +75,7 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
|
UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
|
||||||
LRUQueryCache cache =
|
LRUQueryCache cache =
|
||||||
new LRUQueryCache(
|
new LRUQueryCache(10, Long.MAX_VALUE, ctx -> true, Float.POSITIVE_INFINITY);
|
||||||
10,
|
|
||||||
Long.MAX_VALUE,
|
|
||||||
new LRUQueryCache.MinSegmentSizePredicate(1, 0f),
|
|
||||||
Float.POSITIVE_INFINITY);
|
|
||||||
searcher.setQueryCache(cache);
|
searcher.setQueryCache(cache);
|
||||||
searcher.setQueryCachingPolicy(policy);
|
searcher.setQueryCachingPolicy(policy);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue