mirror of https://github.com/apache/lucene.git
LUCENE-7891: use a non-buggy LRU cache in Lucene's taxonomy facets, by default
This commit is contained in:
parent
f4b13e86ff
commit
b4a1a1a87b
|
@ -52,6 +52,9 @@ Bug Fixes
|
|||
not recommended, lucene-analyzers-icu contains binary data structures
|
||||
specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir)
|
||||
|
||||
* LUCENE-7891: Lucene's taxonomy facets now uses a non-buggy LRU cache
|
||||
by default. (Jan-Willem van den Broek via Mike McCandless)
|
||||
|
||||
Build
|
||||
|
||||
* SOLR-11181: Switch order of maven artifact publishing procedure: deploy first
|
||||
|
|
|
@ -32,8 +32,12 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
* function, LRU_STRING should be used.
|
||||
*/
|
||||
public enum LRUType {
|
||||
/** Use the label's hash as the key; this can lead to
|
||||
* silent conflicts! */
|
||||
/** Use only the label's 64 bit longHashCode as the hash key. Do not
|
||||
* check equals, unlike most hash maps.
|
||||
* Note that while these hashes are very likely to be unique, the chance
|
||||
* of a collision is still greater than zero. If such an unlikely event
|
||||
* occurs, your document will get an incorrect facet.
|
||||
*/
|
||||
LRU_HASHED,
|
||||
|
||||
/** Use the label as the hash key; this is always
|
||||
|
@ -43,15 +47,15 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
|
||||
private NameIntCacheLRU cache;
|
||||
|
||||
/** Creates this with {@link LRUType#LRU_HASHED} method. */
|
||||
/** Creates this with {@link LRUType#LRU_STRING} method. */
|
||||
public LruTaxonomyWriterCache(int cacheSize) {
|
||||
// TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU.
|
||||
// For guaranteed correctness - not relying on no-collisions in the hash
|
||||
// function, NameIntCacheLRU should be used:
|
||||
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
|
||||
// are collisions (which we never found) two different paths would be
|
||||
// mapped to the same ordinal...
|
||||
this(cacheSize, LRUType.LRU_HASHED);
|
||||
// are collisions two different paths would be mapped to the same
|
||||
// ordinal...
|
||||
this(cacheSize, LRUType.LRU_STRING);
|
||||
}
|
||||
|
||||
/** Creates this with the specified method. */
|
||||
|
@ -60,8 +64,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
// For guaranteed correctness - not relying on no-collisions in the hash
|
||||
// function, NameIntCacheLRU should be used:
|
||||
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
|
||||
// are collisions (which we never found) two different paths would be
|
||||
// mapped to the same ordinal...
|
||||
// are collisions two different paths would be mapped to the same
|
||||
// ordinal...
|
||||
if (lruType == LRUType.LRU_HASHED) {
|
||||
this.cache = new NameHashIntCacheLRU(cacheSize);
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.facet.taxonomy.writercache;
|
||||
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestLruTaxonomyWriterCache extends FacetTestCase {
|
||||
|
||||
@Test
|
||||
public void testDefaultLRUTypeIsCollisionSafe() {
|
||||
// These labels are clearly different, but have identical longHashCodes.
|
||||
// Note that these labels are clearly contrived. We did encounter
|
||||
// collisions in actual production data, but we aren't allowed to publish
|
||||
// those.
|
||||
final FacetLabel a = new FacetLabel("\0", "\u0003\uFFE2");
|
||||
final FacetLabel b = new FacetLabel("\1", "\0");
|
||||
// If this fails, then the longHashCode implementation has changed. This
|
||||
// cannot prevent collisions. (All hashes must allow for collisions.) It
|
||||
// will however stop the rest of this test from making sense. To fix, find
|
||||
// new colliding labels, or make a subclass of FacetLabel that produces
|
||||
// collisions.
|
||||
assertEquals(a.longHashCode(), b.longHashCode());
|
||||
// Make a cache with capacity > 2 so both our labels will fit. Don't
|
||||
// specify an LRUType, since we want to check if the default is
|
||||
// collision-safe.
|
||||
final LruTaxonomyWriterCache cache = new LruTaxonomyWriterCache(10);
|
||||
cache.put(a, 0);
|
||||
cache.put(b, 1);
|
||||
assertEquals(cache.get(a), 0);
|
||||
assertEquals(cache.get(b), 1);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue