mirror of https://github.com/apache/lucene.git
LUCENE-7891: use a non-buggy LRU cache in Lucene's taxonomy facets, by default
This commit is contained in:
parent
f4b13e86ff
commit
b4a1a1a87b
|
@ -52,6 +52,9 @@ Bug Fixes
|
||||||
not recommended, lucene-analyzers-icu contains binary data structures
|
not recommended, lucene-analyzers-icu contains binary data structures
|
||||||
specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir)
|
specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-7891: Lucene's taxonomy facets now uses a non-buggy LRU cache
|
||||||
|
by default. (Jan-Willem van den Broek via Mike McCandless)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* SOLR-11181: Switch order of maven artifact publishing procedure: deploy first
|
* SOLR-11181: Switch order of maven artifact publishing procedure: deploy first
|
||||||
|
|
|
@ -32,8 +32,12 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
||||||
* function, LRU_STRING should be used.
|
* function, LRU_STRING should be used.
|
||||||
*/
|
*/
|
||||||
public enum LRUType {
|
public enum LRUType {
|
||||||
/** Use the label's hash as the key; this can lead to
|
/** Use only the label's 64 bit longHashCode as the hash key. Do not
|
||||||
* silent conflicts! */
|
* check equals, unlike most hash maps.
|
||||||
|
* Note that while these hashes are very likely to be unique, the chance
|
||||||
|
* of a collision is still greater than zero. If such an unlikely event
|
||||||
|
* occurs, your document will get an incorrect facet.
|
||||||
|
*/
|
||||||
LRU_HASHED,
|
LRU_HASHED,
|
||||||
|
|
||||||
/** Use the label as the hash key; this is always
|
/** Use the label as the hash key; this is always
|
||||||
|
@ -43,15 +47,15 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
||||||
|
|
||||||
private NameIntCacheLRU cache;
|
private NameIntCacheLRU cache;
|
||||||
|
|
||||||
/** Creates this with {@link LRUType#LRU_HASHED} method. */
|
/** Creates this with {@link LRUType#LRU_STRING} method. */
|
||||||
public LruTaxonomyWriterCache(int cacheSize) {
|
public LruTaxonomyWriterCache(int cacheSize) {
|
||||||
// TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU.
|
// TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU.
|
||||||
// For guaranteed correctness - not relying on no-collisions in the hash
|
// For guaranteed correctness - not relying on no-collisions in the hash
|
||||||
// function, NameIntCacheLRU should be used:
|
// function, NameIntCacheLRU should be used:
|
||||||
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
|
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
|
||||||
// are collisions (which we never found) two different paths would be
|
// are collisions two different paths would be mapped to the same
|
||||||
// mapped to the same ordinal...
|
// ordinal...
|
||||||
this(cacheSize, LRUType.LRU_HASHED);
|
this(cacheSize, LRUType.LRU_STRING);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Creates this with the specified method. */
|
/** Creates this with the specified method. */
|
||||||
|
@ -60,8 +64,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
||||||
// For guaranteed correctness - not relying on no-collisions in the hash
|
// For guaranteed correctness - not relying on no-collisions in the hash
|
||||||
// function, NameIntCacheLRU should be used:
|
// function, NameIntCacheLRU should be used:
|
||||||
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
|
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
|
||||||
// are collisions (which we never found) two different paths would be
|
// are collisions two different paths would be mapped to the same
|
||||||
// mapped to the same ordinal...
|
// ordinal...
|
||||||
if (lruType == LRUType.LRU_HASHED) {
|
if (lruType == LRUType.LRU_HASHED) {
|
||||||
this.cache = new NameHashIntCacheLRU(cacheSize);
|
this.cache = new NameHashIntCacheLRU(cacheSize);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.facet.taxonomy.writercache;
|
||||||
|
|
||||||
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
|
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestLruTaxonomyWriterCache extends FacetTestCase {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDefaultLRUTypeIsCollisionSafe() {
|
||||||
|
// These labels are clearly different, but have identical longHashCodes.
|
||||||
|
// Note that these labels are clearly contrived. We did encounter
|
||||||
|
// collisions in actual production data, but we aren't allowed to publish
|
||||||
|
// those.
|
||||||
|
final FacetLabel a = new FacetLabel("\0", "\u0003\uFFE2");
|
||||||
|
final FacetLabel b = new FacetLabel("\1", "\0");
|
||||||
|
// If this fails, then the longHashCode implementation has changed. This
|
||||||
|
// cannot prevent collisions. (All hashes must allow for collisions.) It
|
||||||
|
// will however stop the rest of this test from making sense. To fix, find
|
||||||
|
// new colliding labels, or make a subclass of FacetLabel that produces
|
||||||
|
// collisions.
|
||||||
|
assertEquals(a.longHashCode(), b.longHashCode());
|
||||||
|
// Make a cache with capacity > 2 so both our labels will fit. Don't
|
||||||
|
// specify an LRUType, since we want to check if the default is
|
||||||
|
// collision-safe.
|
||||||
|
final LruTaxonomyWriterCache cache = new LruTaxonomyWriterCache(10);
|
||||||
|
cache.put(a, 0);
|
||||||
|
cache.put(b, 1);
|
||||||
|
assertEquals(cache.get(a), 0);
|
||||||
|
assertEquals(cache.get(b), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue