mirror of https://github.com/apache/lucene.git
SOLR-7585: Fix NoSuchElementException in LFUCache
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1681449 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4bbc5470e7
commit
88b5ea4935
|
@ -351,6 +351,9 @@ Bug Fixes
|
|||
* SOLR-7335: Fix doc boosts to no longer be multiplied in each field value in multivalued fields that
|
||||
are not used in copyFields (Shingo Sasaki via hossman)
|
||||
|
||||
* SOLR-7585: Fix NoSuchElementException in LFUCache resulting from heavy writes
|
||||
making concurrent put() calls. (Maciej Zasada via Shawn Heisey)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@ public class ConcurrentLFUCache<K, V> implements Cache<K,V> {
|
|||
private final boolean newThreadForCleanup;
|
||||
private volatile boolean islive = true;
|
||||
private final Stats stats = new Stats();
|
||||
@SuppressWarnings("unused")
|
||||
private final int acceptableWaterMark;
|
||||
private long lowHitCount = 0; // not volatile, only accessed in the cleaning method
|
||||
private final EvictionListener<K, V> evictionListener;
|
||||
|
@ -154,57 +155,64 @@ public class ConcurrentLFUCache<K, V> implements Cache<K,V> {
|
|||
}
|
||||
|
||||
/**
|
||||
* Removes items from the cache to bring the size down
|
||||
* to an acceptable value ('acceptableWaterMark').
|
||||
* <p/>
|
||||
* It is done in two stages. In the first stage, least recently used items are evicted.
|
||||
* If, after the first stage, the cache size is still greater than 'acceptableSize'
|
||||
* config parameter, the second stage takes over.
|
||||
* <p/>
|
||||
* The second stage is more intensive and tries to bring down the cache size
|
||||
* to the 'lowerWaterMark' config parameter.
|
||||
* Removes items from the cache to bring the size down to the lowerWaterMark.
|
||||
*/
|
||||
private void markAndSweep() {
|
||||
if (!markAndSweepLock.tryLock()) return;
|
||||
try {
|
||||
long lowHitCount = this.lowHitCount;
|
||||
isCleaning = true;
|
||||
this.lowHitCount = lowHitCount; // volatile write to make isCleaning visible
|
||||
|
||||
this.lowHitCount = lowHitCount; // volatile write to make isCleaning visible
|
||||
|
||||
int sz = stats.size.get();
|
||||
|
||||
if (sz <= upperWaterMark) {
|
||||
/* SOLR-7585: Even though we acquired a lock, multiple threads might detect a need for calling this method.
|
||||
* Locking keeps these from executing at the same time, so they run sequentially. The second and subsequent
|
||||
* sequential runs of this method don't need to be done, since there are no elements to remove.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
int wantToRemove = sz - lowerWaterMark;
|
||||
|
||||
TreeSet<CacheEntry> tree = new TreeSet<>();
|
||||
|
||||
|
||||
TreeSet<CacheEntry<K, V>> tree = new TreeSet<>();
|
||||
|
||||
for (CacheEntry<K, V> ce : map.values()) {
|
||||
// set hitsCopy to avoid later Atomic reads
|
||||
// set hitsCopy to avoid later Atomic reads. Primitive types are faster than the atomic get().
|
||||
ce.hitsCopy = ce.hits.get();
|
||||
ce.lastAccessedCopy = ce.lastAccessed;
|
||||
if (timeDecay) {
|
||||
ce.hits.set(ce.hitsCopy >>> 1);
|
||||
}
|
||||
|
||||
|
||||
if (tree.size() < wantToRemove) {
|
||||
tree.add(ce);
|
||||
} else {
|
||||
// If the hits are not equal, we can remove before adding
|
||||
// which is slightly faster
|
||||
if (ce.hitsCopy < tree.first().hitsCopy) {
|
||||
tree.remove(tree.first());
|
||||
tree.add(ce);
|
||||
} else if (ce.hitsCopy == tree.first().hitsCopy) {
|
||||
tree.add(ce);
|
||||
tree.remove(tree.first());
|
||||
/*
|
||||
* SOLR-7585: Before doing this part, make sure the TreeSet actually has an element, since the first() method
|
||||
* fails with NoSuchElementException if the set is empty. If that test passes, check hits. This test may
|
||||
* never actually fail due to the upperWaterMark check above, but we'll do it anyway.
|
||||
*/
|
||||
if (tree.size() > 0) {
|
||||
/* If hits are not equal, we can remove before adding which is slightly faster. I can no longer remember
|
||||
* why removing first is faster, but I vaguely remember being sure about it!
|
||||
*/
|
||||
if (ce.hitsCopy < tree.first().hitsCopy) {
|
||||
tree.remove(tree.first());
|
||||
tree.add(ce);
|
||||
} else if (ce.hitsCopy == tree.first().hitsCopy) {
|
||||
tree.add(ce);
|
||||
tree.remove(tree.first());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (CacheEntry<K, V> e : tree) {
|
||||
evictEntry(e.key);
|
||||
}
|
||||
} finally {
|
||||
isCleaning = false; // set before markAndSweep.unlock() for visibility
|
||||
isCleaning = false; // set before markAndSweep.unlock() for visibility
|
||||
markAndSweepLock.unlock();
|
||||
}
|
||||
}
|
||||
|
@ -230,12 +238,12 @@ public class ConcurrentLFUCache<K, V> implements Cache<K,V> {
|
|||
Map<K, V> result = new LinkedHashMap<>();
|
||||
if (n <= 0)
|
||||
return result;
|
||||
TreeSet<CacheEntry> tree = new TreeSet<>();
|
||||
TreeSet<CacheEntry<K, V>> tree = new TreeSet<>();
|
||||
// we need to grab the lock since we are changing the copy variables
|
||||
markAndSweepLock.lock();
|
||||
try {
|
||||
for (Map.Entry<Object, CacheEntry<K, V>> entry : map.entrySet()) {
|
||||
CacheEntry ce = entry.getValue();
|
||||
CacheEntry<K, V> ce = entry.getValue();
|
||||
ce.hitsCopy = ce.hits.get();
|
||||
ce.lastAccessedCopy = ce.lastAccessed;
|
||||
if (tree.size() < n) {
|
||||
|
@ -274,7 +282,7 @@ public class ConcurrentLFUCache<K, V> implements Cache<K,V> {
|
|||
Map<K, V> result = new LinkedHashMap<>();
|
||||
if (n <= 0)
|
||||
return result;
|
||||
TreeSet<CacheEntry> tree = new TreeSet<>();
|
||||
TreeSet<CacheEntry<K, V>> tree = new TreeSet<>();
|
||||
// we need to grab the lock since we are changing the copy variables
|
||||
markAndSweepLock.lock();
|
||||
try {
|
||||
|
|
|
@ -18,8 +18,10 @@ package org.apache.solr.search;
|
|||
*/
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.util.ConcurrentLFUCache;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -28,6 +30,9 @@ import java.io.IOException;
|
|||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -360,6 +365,41 @@ public class TestLFUCache extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConcurrentAccess() throws InterruptedException {
|
||||
/* Set up a thread pool with twice as many threads as there are CPUs. */
|
||||
final ConcurrentLFUCache<Integer,Long> cache = new ConcurrentLFUCache<>(10, 9);
|
||||
ExecutorService executorService = ExecutorUtil.newMDCAwareFixedThreadPool(10,
|
||||
new DefaultSolrThreadFactory("testConcurrentAccess"));
|
||||
final AtomicReference<Throwable> error = new AtomicReference<>();
|
||||
|
||||
/*
|
||||
* Use the thread pool to execute at least two million puts into the cache.
|
||||
* Without the fix on SOLR-7585, NoSuchElementException is thrown.
|
||||
* Simultaneous calls to markAndSweep are protected from each other by a
|
||||
* lock, so they run sequentially, and due to a problem in the previous
|
||||
* design, the cache eviction doesn't work right.
|
||||
*/
|
||||
for (int i = 0; i < atLeast(2_000_000); ++i) {
|
||||
executorService.submit(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
cache.put(random().nextInt(100), random().nextLong());
|
||||
} catch (Throwable t) {
|
||||
error.compareAndSet(null, t);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
executorService.shutdown();
|
||||
executorService.awaitTermination(1, TimeUnit.MINUTES);
|
||||
|
||||
// then:
|
||||
assertNull("Exception during concurrent access: " + error.get(), error.get());
|
||||
}
|
||||
|
||||
// From the original LRU cache tests, they're commented out there too because they take a while.
|
||||
// void doPerfTest(int iter, int cacheSize, int maxKey) {
|
||||
// long start = System.currentTimeMillis();
|
||||
|
|
Loading…
Reference in New Issue