mirror of https://github.com/apache/lucene.git
LUCENE-6118: Improve the efficiency of the history structure for recently-used filters.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1646674 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8079368b8b
commit
5a9639c3e6
|
@ -36,6 +36,9 @@ import org.apache.lucene.util.FrequencyTrackingRingBuffer;
|
|||
*/
|
||||
public final class UsageTrackingFilterCachingPolicy implements FilterCachingPolicy {
|
||||
|
||||
// the hash code that we use as a sentinel in the ring buffer.
|
||||
private static final int SENTINEL = Integer.MIN_VALUE;
|
||||
|
||||
static boolean isCostly(Filter filter) {
|
||||
// This does not measure the cost of iterating over the filter (for this we
|
||||
// already have the DocIdSetIterator#cost API) but the cost to build the
|
||||
|
@ -51,7 +54,7 @@ public final class UsageTrackingFilterCachingPolicy implements FilterCachingPoli
|
|||
}
|
||||
|
||||
private final FilterCachingPolicy.CacheOnLargeSegments segmentPolicy;
|
||||
private final FrequencyTrackingRingBuffer<Integer> recentlyUsedFilters;
|
||||
private final FrequencyTrackingRingBuffer recentlyUsedFilters;
|
||||
private final int minFrequencyCostlyFilters;
|
||||
private final int minFrequencyCheapFilters;
|
||||
private final int minFrequencyOtherFilters;
|
||||
|
@ -96,7 +99,7 @@ public final class UsageTrackingFilterCachingPolicy implements FilterCachingPoli
|
|||
if (minFrequencyCheapFilters > historySize || minFrequencyCostlyFilters > historySize || minFrequencyOtherFilters > historySize) {
|
||||
throw new IllegalArgumentException("The minimum frequencies should be less than the size of the history of filters that are being tracked");
|
||||
}
|
||||
this.recentlyUsedFilters = new FrequencyTrackingRingBuffer<>(historySize);
|
||||
this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL);
|
||||
this.minFrequencyCostlyFilters = minFrequencyCostlyFilters;
|
||||
this.minFrequencyCheapFilters = minFrequencyCheapFilters;
|
||||
this.minFrequencyOtherFilters = minFrequencyOtherFilters;
|
||||
|
@ -104,9 +107,10 @@ public final class UsageTrackingFilterCachingPolicy implements FilterCachingPoli
|
|||
|
||||
@Override
|
||||
public void onUse(Filter filter) {
|
||||
// Using the filter hash codes might help keep memory usage a bit lower
|
||||
// since some filters might have non-negligible memory usage?
|
||||
recentlyUsedFilters.add(filter.hashCode());
|
||||
// we only track hash codes, which
|
||||
synchronized (this) {
|
||||
recentlyUsedFilters.add(filter.hashCode());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -114,7 +118,10 @@ public final class UsageTrackingFilterCachingPolicy implements FilterCachingPoli
|
|||
if (segmentPolicy.shouldCache(filter, context, set) == false) {
|
||||
return false;
|
||||
}
|
||||
final int frequency = recentlyUsedFilters.frequency(filter.hashCode());
|
||||
final int frequency;
|
||||
synchronized (this) {
|
||||
frequency = recentlyUsedFilters.frequency(filter.hashCode());
|
||||
}
|
||||
if (frequency >= minFrequencyOtherFilters) {
|
||||
return true;
|
||||
} else if (isCostly(filter) && frequency >= minFrequencyCostlyFilters) {
|
||||
|
|
|
@ -17,75 +17,202 @@ package org.apache.lucene.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.Collections;
|
||||
import java.util.Deque;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
/**
|
||||
* A ring buffer that tracks the frequency of the items that it contains.
|
||||
* This is typically useful to track popular recently-used items.
|
||||
* A ring buffer that tracks the frequency of the integers that it contains.
|
||||
* This is typically useful to track the hash codes of popular recently-used
|
||||
* items.
|
||||
*
|
||||
* This class is thread-safe.
|
||||
* This data-structure requires 22 bytes per entry on average (between 16 and
|
||||
* 28).
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class FrequencyTrackingRingBuffer<T> {
|
||||
public final class FrequencyTrackingRingBuffer implements Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FrequencyTrackingRingBuffer.class);
|
||||
|
||||
private final int maxSize;
|
||||
private final Deque<T> ringBuffer;
|
||||
private final ConcurrentMap<T, Integer> frequencies;
|
||||
private final int[] buffer;
|
||||
private int position;
|
||||
private final IntBag frequencies;
|
||||
|
||||
/** Create a new ring buffer that will contain at most <code>size</code> items. */
|
||||
public FrequencyTrackingRingBuffer(int maxSize) {
|
||||
/** Create a new ring buffer that will contain at most <code>maxSize</code> items.
|
||||
* This buffer will initially contain <code>maxSize</code> times the
|
||||
* <code>sentinel</code> value. */
|
||||
public FrequencyTrackingRingBuffer(int maxSize, int sentinel) {
|
||||
if (maxSize < 2) {
|
||||
throw new IllegalArgumentException("maxSize must be at least 2");
|
||||
}
|
||||
this.maxSize = maxSize;
|
||||
this.ringBuffer = new ArrayDeque<>(maxSize);
|
||||
this.frequencies = new ConcurrentHashMap<>();
|
||||
buffer = new int[maxSize];
|
||||
position = 0;
|
||||
frequencies = new IntBag(maxSize);
|
||||
|
||||
Arrays.fill(buffer, sentinel);
|
||||
for (int i = 0; i < maxSize; ++i) {
|
||||
frequencies.add(sentinel);
|
||||
}
|
||||
assert frequencies.frequency(sentinel) == maxSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED
|
||||
+ frequencies.ramBytesUsed()
|
||||
+ RamUsageEstimator.sizeOf(buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new item to this ring buffer, potentially removing the oldest
|
||||
* entry from this buffer if it is already full.
|
||||
*/
|
||||
public synchronized void add(T item) {
|
||||
// we need this method to be protected by a lock since it is important for
|
||||
// correctness that the ring buffer and the frequencies table have
|
||||
// consistent content
|
||||
if (item == null) {
|
||||
throw new IllegalArgumentException("null items are not supported");
|
||||
public void add(int i) {
|
||||
// remove the previous value
|
||||
final int removed = buffer[position];
|
||||
final boolean removedFromBag = frequencies.remove(removed);
|
||||
assert removedFromBag;
|
||||
// add the new value
|
||||
buffer[position] = i;
|
||||
frequencies.add(i);
|
||||
// increment the position
|
||||
position += 1;
|
||||
if (position == maxSize) {
|
||||
position = 0;
|
||||
}
|
||||
assert ringBuffer.size() <= maxSize;
|
||||
if (ringBuffer.size() == maxSize) {
|
||||
// evict the oldest entry
|
||||
final T removed = ringBuffer.removeFirst();
|
||||
final int newFrequency = frequency(removed) - 1;
|
||||
if (newFrequency == 0) {
|
||||
// free for GC
|
||||
frequencies.remove(removed);
|
||||
} else {
|
||||
frequencies.put(removed, newFrequency);
|
||||
}
|
||||
}
|
||||
|
||||
// add the new entry and update frequencies
|
||||
ringBuffer.addLast(item);
|
||||
frequencies.put(item, frequency(item) + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the frequency of the provided item in the ring buffer.
|
||||
* Returns the frequency of the provided key in the ring buffer.
|
||||
*/
|
||||
public int frequency(T item) {
|
||||
// The use of a concurrent hash map allows us to not use a lock for this read-only method
|
||||
final Integer freq = frequencies.get(item);
|
||||
return freq == null ? 0 : freq;
|
||||
public int frequency(int key) {
|
||||
return frequencies.frequency(key);
|
||||
}
|
||||
|
||||
// pkg-private for testing
|
||||
Map<T, Integer> asFrequencyMap() {
|
||||
return Collections.unmodifiableMap(frequencies);
|
||||
Map<Integer, Integer> asFrequencyMap() {
|
||||
return frequencies.asMap();
|
||||
}
|
||||
|
||||
/**
|
||||
* A bag of integers.
|
||||
* Since in the context of the ring buffer the maximum size is known up-front
|
||||
* there is no need to worry about resizing the underlying storage.
|
||||
*/
|
||||
private static class IntBag implements Accountable {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(IntBag.class);
|
||||
|
||||
private final int[] keys;
|
||||
private final int[] freqs;
|
||||
private final int mask;
|
||||
|
||||
IntBag(int maxSize) {
|
||||
// load factor of 2/3
|
||||
int capacity = Math.max(2, maxSize * 3 / 2);
|
||||
// round up to the next power of two
|
||||
capacity = Integer.highestOneBit(capacity - 1) << 1;
|
||||
assert capacity > maxSize;
|
||||
keys = new int[capacity];
|
||||
freqs = new int[capacity];
|
||||
mask = capacity - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED
|
||||
+ RamUsageEstimator.sizeOf(keys)
|
||||
+ RamUsageEstimator.sizeOf(freqs);
|
||||
}
|
||||
|
||||
/** Return the frequency of the give key in the bag. */
|
||||
int frequency(int key) {
|
||||
for (int slot = key & mask; ; slot = (slot + 1) & mask) {
|
||||
if (keys[slot] == key) {
|
||||
return freqs[slot];
|
||||
} else if (freqs[slot] == 0) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Increment the frequency of the given key by 1 and return its new frequency. */
|
||||
int add(int key) {
|
||||
for (int slot = key & mask; ; slot = (slot + 1) & mask) {
|
||||
if (freqs[slot] == 0) {
|
||||
keys[slot] = key;
|
||||
return freqs[slot] = 1;
|
||||
} else if (keys[slot] == key) {
|
||||
return ++freqs[slot];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Decrement the frequency of the given key by one, or do nothing if the
|
||||
* key is not present in the bag. Returns true iff the key was contained
|
||||
* in the bag. */
|
||||
boolean remove(int key) {
|
||||
for (int slot = key & mask; ; slot = (slot + 1) & mask) {
|
||||
if (freqs[slot] == 0) {
|
||||
// no such key in the bag
|
||||
return false;
|
||||
} else if (keys[slot] == key) {
|
||||
final int newFreq = --freqs[slot];
|
||||
if (newFreq == 0) { // removed
|
||||
relocateAdjacentKeys(slot);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void relocateAdjacentKeys(int freeSlot) {
|
||||
for (int slot = (freeSlot + 1) & mask; ; slot = (slot + 1) & mask) {
|
||||
final int freq = freqs[slot];
|
||||
if (freq == 0) {
|
||||
// end of the collision chain, we're done
|
||||
break;
|
||||
}
|
||||
final int key = keys[slot];
|
||||
// the slot where <code>key</code> should be if there were no collisions
|
||||
final int expectedSlot = key & mask;
|
||||
// if the free slot is between the expected slot and the slot where the
|
||||
// key is, then we can relocate there
|
||||
if (between(expectedSlot, slot, freeSlot)) {
|
||||
keys[freeSlot] = key;
|
||||
freqs[freeSlot] = freq;
|
||||
// slot is the new free slot
|
||||
freqs[slot] = 0;
|
||||
freeSlot = slot;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Given a chain of occupied slots between <code>chainStart</code>
|
||||
* and <code>chainEnd</code>, return whether <code>slot</code> is
|
||||
* between the start and end of the chain. */
|
||||
private static boolean between(int chainStart, int chainEnd, int slot) {
|
||||
if (chainStart <= chainEnd) {
|
||||
return chainStart <= slot && slot <= chainEnd;
|
||||
} else {
|
||||
// the chain is across the end of the array
|
||||
return slot >= chainStart || slot <= chainEnd;
|
||||
}
|
||||
}
|
||||
|
||||
Map<Integer, Integer> asMap() {
|
||||
Map<Integer, Integer> map = new HashMap<>();
|
||||
for (int i = 0; i < keys.length; ++i) {
|
||||
if (freqs[i] > 0) {
|
||||
map.put(keys[i], freqs[i]);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -24,15 +24,19 @@ import java.util.Map;
|
|||
|
||||
public class TestFrequencyTrackingRingBuffer extends LuceneTestCase {
|
||||
|
||||
private static <T> void assertBuffer(FrequencyTrackingRingBuffer<T> buffer, int maxSize, List<T> items) {
|
||||
final List<T> recentItems;
|
||||
private static void assertBuffer(FrequencyTrackingRingBuffer buffer, int maxSize, int sentinel, List<Integer> items) {
|
||||
final List<Integer> recentItems;
|
||||
if (items.size() <= maxSize) {
|
||||
recentItems = items;
|
||||
recentItems = new ArrayList<>();
|
||||
for (int i = items.size(); i < maxSize; ++i) {
|
||||
recentItems.add(sentinel);
|
||||
}
|
||||
recentItems.addAll(items);
|
||||
} else {
|
||||
recentItems = items.subList(items.size() - maxSize, items.size());
|
||||
}
|
||||
final Map<T, Integer> expectedFrequencies = new HashMap<T, Integer>();
|
||||
for (T item : recentItems) {
|
||||
final Map<Integer, Integer> expectedFrequencies = new HashMap<Integer, Integer>();
|
||||
for (Integer item : recentItems) {
|
||||
final Integer freq = expectedFrequencies.get(item);
|
||||
if (freq == null) {
|
||||
expectedFrequencies.put(item, 1);
|
||||
|
@ -46,18 +50,29 @@ public class TestFrequencyTrackingRingBuffer extends LuceneTestCase {
|
|||
public void test() {
|
||||
final int iterations = atLeast(100);
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
final int maxSize = 1 + random().nextInt(100);
|
||||
final int numitems = random().nextInt(500);
|
||||
final int maxSize = 2 + random().nextInt(100);
|
||||
final int numitems = random().nextInt(5000);
|
||||
final int maxitem = 1 + random().nextInt(100);
|
||||
List<Integer> items = new ArrayList<>();
|
||||
FrequencyTrackingRingBuffer<Integer> buffer = new FrequencyTrackingRingBuffer<>(maxSize);
|
||||
final int sentinel = random().nextInt(200);
|
||||
FrequencyTrackingRingBuffer buffer = new FrequencyTrackingRingBuffer(maxSize, sentinel);
|
||||
for (int j = 0; j < numitems; ++j) {
|
||||
final Integer item = random().nextInt(maxitem);
|
||||
items.add(item);
|
||||
buffer.add(item);
|
||||
}
|
||||
assertBuffer(buffer, maxSize, items);
|
||||
assertBuffer(buffer, maxSize, sentinel, items);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRamBytesUsed() {
|
||||
final int maxSize = 2 + random().nextInt(10000);
|
||||
final int sentinel = random().nextInt();
|
||||
FrequencyTrackingRingBuffer buffer = new FrequencyTrackingRingBuffer(maxSize, sentinel);
|
||||
for (int i = 0; i < 10000; ++i) {
|
||||
buffer.add(random().nextInt());
|
||||
}
|
||||
assertEquals(RamUsageTester.sizeOf(buffer), buffer.ramBytesUsed());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue