mirror of https://github.com/apache/lucene.git
LUCENE-6118: Improve the efficiency of the history structure for recently-used filters.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1646674 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8079368b8b
commit
5a9639c3e6
|
@ -36,6 +36,9 @@ import org.apache.lucene.util.FrequencyTrackingRingBuffer;
|
||||||
*/
|
*/
|
||||||
public final class UsageTrackingFilterCachingPolicy implements FilterCachingPolicy {
|
public final class UsageTrackingFilterCachingPolicy implements FilterCachingPolicy {
|
||||||
|
|
||||||
|
// the hash code that we use as a sentinel in the ring buffer.
|
||||||
|
private static final int SENTINEL = Integer.MIN_VALUE;
|
||||||
|
|
||||||
static boolean isCostly(Filter filter) {
|
static boolean isCostly(Filter filter) {
|
||||||
// This does not measure the cost of iterating over the filter (for this we
|
// This does not measure the cost of iterating over the filter (for this we
|
||||||
// already have the DocIdSetIterator#cost API) but the cost to build the
|
// already have the DocIdSetIterator#cost API) but the cost to build the
|
||||||
|
@ -51,7 +54,7 @@ public final class UsageTrackingFilterCachingPolicy implements FilterCachingPoli
|
||||||
}
|
}
|
||||||
|
|
||||||
private final FilterCachingPolicy.CacheOnLargeSegments segmentPolicy;
|
private final FilterCachingPolicy.CacheOnLargeSegments segmentPolicy;
|
||||||
private final FrequencyTrackingRingBuffer<Integer> recentlyUsedFilters;
|
private final FrequencyTrackingRingBuffer recentlyUsedFilters;
|
||||||
private final int minFrequencyCostlyFilters;
|
private final int minFrequencyCostlyFilters;
|
||||||
private final int minFrequencyCheapFilters;
|
private final int minFrequencyCheapFilters;
|
||||||
private final int minFrequencyOtherFilters;
|
private final int minFrequencyOtherFilters;
|
||||||
|
@ -96,7 +99,7 @@ public final class UsageTrackingFilterCachingPolicy implements FilterCachingPoli
|
||||||
if (minFrequencyCheapFilters > historySize || minFrequencyCostlyFilters > historySize || minFrequencyOtherFilters > historySize) {
|
if (minFrequencyCheapFilters > historySize || minFrequencyCostlyFilters > historySize || minFrequencyOtherFilters > historySize) {
|
||||||
throw new IllegalArgumentException("The minimum frequencies should be less than the size of the history of filters that are being tracked");
|
throw new IllegalArgumentException("The minimum frequencies should be less than the size of the history of filters that are being tracked");
|
||||||
}
|
}
|
||||||
this.recentlyUsedFilters = new FrequencyTrackingRingBuffer<>(historySize);
|
this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL);
|
||||||
this.minFrequencyCostlyFilters = minFrequencyCostlyFilters;
|
this.minFrequencyCostlyFilters = minFrequencyCostlyFilters;
|
||||||
this.minFrequencyCheapFilters = minFrequencyCheapFilters;
|
this.minFrequencyCheapFilters = minFrequencyCheapFilters;
|
||||||
this.minFrequencyOtherFilters = minFrequencyOtherFilters;
|
this.minFrequencyOtherFilters = minFrequencyOtherFilters;
|
||||||
|
@ -104,17 +107,21 @@ public final class UsageTrackingFilterCachingPolicy implements FilterCachingPoli
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onUse(Filter filter) {
|
public void onUse(Filter filter) {
|
||||||
// Using the filter hash codes might help keep memory usage a bit lower
|
// we only track hash codes, which
|
||||||
// since some filters might have non-negligible memory usage?
|
synchronized (this) {
|
||||||
recentlyUsedFilters.add(filter.hashCode());
|
recentlyUsedFilters.add(filter.hashCode());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean shouldCache(Filter filter, LeafReaderContext context, DocIdSet set) throws IOException {
|
public boolean shouldCache(Filter filter, LeafReaderContext context, DocIdSet set) throws IOException {
|
||||||
if (segmentPolicy.shouldCache(filter, context, set) == false) {
|
if (segmentPolicy.shouldCache(filter, context, set) == false) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
final int frequency = recentlyUsedFilters.frequency(filter.hashCode());
|
final int frequency;
|
||||||
|
synchronized (this) {
|
||||||
|
frequency = recentlyUsedFilters.frequency(filter.hashCode());
|
||||||
|
}
|
||||||
if (frequency >= minFrequencyOtherFilters) {
|
if (frequency >= minFrequencyOtherFilters) {
|
||||||
return true;
|
return true;
|
||||||
} else if (isCostly(filter) && frequency >= minFrequencyCostlyFilters) {
|
} else if (isCostly(filter) && frequency >= minFrequencyCostlyFilters) {
|
||||||
|
|
|
@ -17,75 +17,202 @@ package org.apache.lucene.util;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.ArrayDeque;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.HashMap;
|
||||||
import java.util.Deque;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.ConcurrentMap;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A ring buffer that tracks the frequency of the items that it contains.
|
* A ring buffer that tracks the frequency of the integers that it contains.
|
||||||
* This is typically useful to track popular recently-used items.
|
* This is typically useful to track the hash codes of popular recently-used
|
||||||
|
* items.
|
||||||
*
|
*
|
||||||
* This class is thread-safe.
|
* This data-structure requires 22 bytes per entry on average (between 16 and
|
||||||
|
* 28).
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public final class FrequencyTrackingRingBuffer<T> {
|
public final class FrequencyTrackingRingBuffer implements Accountable {
|
||||||
|
|
||||||
|
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FrequencyTrackingRingBuffer.class);
|
||||||
|
|
||||||
private final int maxSize;
|
private final int maxSize;
|
||||||
private final Deque<T> ringBuffer;
|
private final int[] buffer;
|
||||||
private final ConcurrentMap<T, Integer> frequencies;
|
private int position;
|
||||||
|
private final IntBag frequencies;
|
||||||
|
|
||||||
/** Create a new ring buffer that will contain at most <code>size</code> items. */
|
/** Create a new ring buffer that will contain at most <code>maxSize</code> items.
|
||||||
public FrequencyTrackingRingBuffer(int maxSize) {
|
* This buffer will initially contain <code>maxSize</code> times the
|
||||||
|
* <code>sentinel</code> value. */
|
||||||
|
public FrequencyTrackingRingBuffer(int maxSize, int sentinel) {
|
||||||
|
if (maxSize < 2) {
|
||||||
|
throw new IllegalArgumentException("maxSize must be at least 2");
|
||||||
|
}
|
||||||
this.maxSize = maxSize;
|
this.maxSize = maxSize;
|
||||||
this.ringBuffer = new ArrayDeque<>(maxSize);
|
buffer = new int[maxSize];
|
||||||
this.frequencies = new ConcurrentHashMap<>();
|
position = 0;
|
||||||
|
frequencies = new IntBag(maxSize);
|
||||||
|
|
||||||
|
Arrays.fill(buffer, sentinel);
|
||||||
|
for (int i = 0; i < maxSize; ++i) {
|
||||||
|
frequencies.add(sentinel);
|
||||||
|
}
|
||||||
|
assert frequencies.frequency(sentinel) == maxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return BASE_RAM_BYTES_USED
|
||||||
|
+ frequencies.ramBytesUsed()
|
||||||
|
+ RamUsageEstimator.sizeOf(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a new item to this ring buffer, potentially removing the oldest
|
* Add a new item to this ring buffer, potentially removing the oldest
|
||||||
* entry from this buffer if it is already full.
|
* entry from this buffer if it is already full.
|
||||||
*/
|
*/
|
||||||
public synchronized void add(T item) {
|
public void add(int i) {
|
||||||
// we need this method to be protected by a lock since it is important for
|
// remove the previous value
|
||||||
// correctness that the ring buffer and the frequencies table have
|
final int removed = buffer[position];
|
||||||
// consistent content
|
final boolean removedFromBag = frequencies.remove(removed);
|
||||||
if (item == null) {
|
assert removedFromBag;
|
||||||
throw new IllegalArgumentException("null items are not supported");
|
// add the new value
|
||||||
|
buffer[position] = i;
|
||||||
|
frequencies.add(i);
|
||||||
|
// increment the position
|
||||||
|
position += 1;
|
||||||
|
if (position == maxSize) {
|
||||||
|
position = 0;
|
||||||
}
|
}
|
||||||
assert ringBuffer.size() <= maxSize;
|
|
||||||
if (ringBuffer.size() == maxSize) {
|
|
||||||
// evict the oldest entry
|
|
||||||
final T removed = ringBuffer.removeFirst();
|
|
||||||
final int newFrequency = frequency(removed) - 1;
|
|
||||||
if (newFrequency == 0) {
|
|
||||||
// free for GC
|
|
||||||
frequencies.remove(removed);
|
|
||||||
} else {
|
|
||||||
frequencies.put(removed, newFrequency);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// add the new entry and update frequencies
|
|
||||||
ringBuffer.addLast(item);
|
|
||||||
frequencies.put(item, frequency(item) + 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the frequency of the provided item in the ring buffer.
|
* Returns the frequency of the provided key in the ring buffer.
|
||||||
*/
|
*/
|
||||||
public int frequency(T item) {
|
public int frequency(int key) {
|
||||||
// The use of a concurrent hash map allows us to not use a lock for this read-only method
|
return frequencies.frequency(key);
|
||||||
final Integer freq = frequencies.get(item);
|
|
||||||
return freq == null ? 0 : freq;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// pkg-private for testing
|
// pkg-private for testing
|
||||||
Map<T, Integer> asFrequencyMap() {
|
Map<Integer, Integer> asFrequencyMap() {
|
||||||
return Collections.unmodifiableMap(frequencies);
|
return frequencies.asMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A bag of integers.
|
||||||
|
* Since in the context of the ring buffer the maximum size is known up-front
|
||||||
|
* there is no need to worry about resizing the underlying storage.
|
||||||
|
*/
|
||||||
|
private static class IntBag implements Accountable {
|
||||||
|
|
||||||
|
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(IntBag.class);
|
||||||
|
|
||||||
|
private final int[] keys;
|
||||||
|
private final int[] freqs;
|
||||||
|
private final int mask;
|
||||||
|
|
||||||
|
IntBag(int maxSize) {
|
||||||
|
// load factor of 2/3
|
||||||
|
int capacity = Math.max(2, maxSize * 3 / 2);
|
||||||
|
// round up to the next power of two
|
||||||
|
capacity = Integer.highestOneBit(capacity - 1) << 1;
|
||||||
|
assert capacity > maxSize;
|
||||||
|
keys = new int[capacity];
|
||||||
|
freqs = new int[capacity];
|
||||||
|
mask = capacity - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return BASE_RAM_BYTES_USED
|
||||||
|
+ RamUsageEstimator.sizeOf(keys)
|
||||||
|
+ RamUsageEstimator.sizeOf(freqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the frequency of the give key in the bag. */
|
||||||
|
int frequency(int key) {
|
||||||
|
for (int slot = key & mask; ; slot = (slot + 1) & mask) {
|
||||||
|
if (keys[slot] == key) {
|
||||||
|
return freqs[slot];
|
||||||
|
} else if (freqs[slot] == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Increment the frequency of the given key by 1 and return its new frequency. */
|
||||||
|
int add(int key) {
|
||||||
|
for (int slot = key & mask; ; slot = (slot + 1) & mask) {
|
||||||
|
if (freqs[slot] == 0) {
|
||||||
|
keys[slot] = key;
|
||||||
|
return freqs[slot] = 1;
|
||||||
|
} else if (keys[slot] == key) {
|
||||||
|
return ++freqs[slot];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Decrement the frequency of the given key by one, or do nothing if the
|
||||||
|
* key is not present in the bag. Returns true iff the key was contained
|
||||||
|
* in the bag. */
|
||||||
|
boolean remove(int key) {
|
||||||
|
for (int slot = key & mask; ; slot = (slot + 1) & mask) {
|
||||||
|
if (freqs[slot] == 0) {
|
||||||
|
// no such key in the bag
|
||||||
|
return false;
|
||||||
|
} else if (keys[slot] == key) {
|
||||||
|
final int newFreq = --freqs[slot];
|
||||||
|
if (newFreq == 0) { // removed
|
||||||
|
relocateAdjacentKeys(slot);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void relocateAdjacentKeys(int freeSlot) {
|
||||||
|
for (int slot = (freeSlot + 1) & mask; ; slot = (slot + 1) & mask) {
|
||||||
|
final int freq = freqs[slot];
|
||||||
|
if (freq == 0) {
|
||||||
|
// end of the collision chain, we're done
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
final int key = keys[slot];
|
||||||
|
// the slot where <code>key</code> should be if there were no collisions
|
||||||
|
final int expectedSlot = key & mask;
|
||||||
|
// if the free slot is between the expected slot and the slot where the
|
||||||
|
// key is, then we can relocate there
|
||||||
|
if (between(expectedSlot, slot, freeSlot)) {
|
||||||
|
keys[freeSlot] = key;
|
||||||
|
freqs[freeSlot] = freq;
|
||||||
|
// slot is the new free slot
|
||||||
|
freqs[slot] = 0;
|
||||||
|
freeSlot = slot;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given a chain of occupied slots between <code>chainStart</code>
|
||||||
|
* and <code>chainEnd</code>, return whether <code>slot</code> is
|
||||||
|
* between the start and end of the chain. */
|
||||||
|
private static boolean between(int chainStart, int chainEnd, int slot) {
|
||||||
|
if (chainStart <= chainEnd) {
|
||||||
|
return chainStart <= slot && slot <= chainEnd;
|
||||||
|
} else {
|
||||||
|
// the chain is across the end of the array
|
||||||
|
return slot >= chainStart || slot <= chainEnd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<Integer, Integer> asMap() {
|
||||||
|
Map<Integer, Integer> map = new HashMap<>();
|
||||||
|
for (int i = 0; i < keys.length; ++i) {
|
||||||
|
if (freqs[i] > 0) {
|
||||||
|
map.put(keys[i], freqs[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,15 +24,19 @@ import java.util.Map;
|
||||||
|
|
||||||
public class TestFrequencyTrackingRingBuffer extends LuceneTestCase {
|
public class TestFrequencyTrackingRingBuffer extends LuceneTestCase {
|
||||||
|
|
||||||
private static <T> void assertBuffer(FrequencyTrackingRingBuffer<T> buffer, int maxSize, List<T> items) {
|
private static void assertBuffer(FrequencyTrackingRingBuffer buffer, int maxSize, int sentinel, List<Integer> items) {
|
||||||
final List<T> recentItems;
|
final List<Integer> recentItems;
|
||||||
if (items.size() <= maxSize) {
|
if (items.size() <= maxSize) {
|
||||||
recentItems = items;
|
recentItems = new ArrayList<>();
|
||||||
|
for (int i = items.size(); i < maxSize; ++i) {
|
||||||
|
recentItems.add(sentinel);
|
||||||
|
}
|
||||||
|
recentItems.addAll(items);
|
||||||
} else {
|
} else {
|
||||||
recentItems = items.subList(items.size() - maxSize, items.size());
|
recentItems = items.subList(items.size() - maxSize, items.size());
|
||||||
}
|
}
|
||||||
final Map<T, Integer> expectedFrequencies = new HashMap<T, Integer>();
|
final Map<Integer, Integer> expectedFrequencies = new HashMap<Integer, Integer>();
|
||||||
for (T item : recentItems) {
|
for (Integer item : recentItems) {
|
||||||
final Integer freq = expectedFrequencies.get(item);
|
final Integer freq = expectedFrequencies.get(item);
|
||||||
if (freq == null) {
|
if (freq == null) {
|
||||||
expectedFrequencies.put(item, 1);
|
expectedFrequencies.put(item, 1);
|
||||||
|
@ -46,18 +50,29 @@ public class TestFrequencyTrackingRingBuffer extends LuceneTestCase {
|
||||||
public void test() {
|
public void test() {
|
||||||
final int iterations = atLeast(100);
|
final int iterations = atLeast(100);
|
||||||
for (int i = 0; i < iterations; ++i) {
|
for (int i = 0; i < iterations; ++i) {
|
||||||
final int maxSize = 1 + random().nextInt(100);
|
final int maxSize = 2 + random().nextInt(100);
|
||||||
final int numitems = random().nextInt(500);
|
final int numitems = random().nextInt(5000);
|
||||||
final int maxitem = 1 + random().nextInt(100);
|
final int maxitem = 1 + random().nextInt(100);
|
||||||
List<Integer> items = new ArrayList<>();
|
List<Integer> items = new ArrayList<>();
|
||||||
FrequencyTrackingRingBuffer<Integer> buffer = new FrequencyTrackingRingBuffer<>(maxSize);
|
final int sentinel = random().nextInt(200);
|
||||||
|
FrequencyTrackingRingBuffer buffer = new FrequencyTrackingRingBuffer(maxSize, sentinel);
|
||||||
for (int j = 0; j < numitems; ++j) {
|
for (int j = 0; j < numitems; ++j) {
|
||||||
final Integer item = random().nextInt(maxitem);
|
final Integer item = random().nextInt(maxitem);
|
||||||
items.add(item);
|
items.add(item);
|
||||||
buffer.add(item);
|
buffer.add(item);
|
||||||
}
|
}
|
||||||
assertBuffer(buffer, maxSize, items);
|
assertBuffer(buffer, maxSize, sentinel, items);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRamBytesUsed() {
|
||||||
|
final int maxSize = 2 + random().nextInt(10000);
|
||||||
|
final int sentinel = random().nextInt();
|
||||||
|
FrequencyTrackingRingBuffer buffer = new FrequencyTrackingRingBuffer(maxSize, sentinel);
|
||||||
|
for (int i = 0; i < 10000; ++i) {
|
||||||
|
buffer.add(random().nextInt());
|
||||||
|
}
|
||||||
|
assertEquals(RamUsageTester.sizeOf(buffer), buffer.ramBytesUsed());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue