LUCENE-7891: use a non-buggy LRU cache in Lucene's taxonomy facets, by default

This commit is contained in:
Mike McCandless 2017-09-05 10:13:58 -04:00
parent f4b13e86ff
commit b4a1a1a87b
3 changed files with 65 additions and 8 deletions

View File

@ -52,6 +52,9 @@ Bug Fixes
not recommended, lucene-analyzers-icu contains binary data structures
specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir)
* LUCENE-7891: Lucene's taxonomy facets now uses a non-buggy LRU cache
by default. (Jan-Willem van den Broek via Mike McCandless)
Build
* SOLR-11181: Switch order of maven artifact publishing procedure: deploy first

View File

@ -32,8 +32,12 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
* function, LRU_STRING should be used.
*/
public enum LRUType {
/** Use the label's hash as the key; this can lead to
* silent conflicts! */
/** Use only the label's 64 bit longHashCode as the hash key. Do not
* check equals, unlike most hash maps.
* Note that while these hashes are very likely to be unique, the chance
* of a collision is still greater than zero. If such an unlikely event
* occurs, your document will get an incorrect facet.
*/
LRU_HASHED,
/** Use the label as the hash key; this is always
@ -43,15 +47,15 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
private NameIntCacheLRU cache;
/** Creates this with {@link LRUType#LRU_HASHED} method. */
/** Creates this with {@link LRUType#LRU_STRING} method. */
public LruTaxonomyWriterCache(int cacheSize) {
// TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU.
// For guaranteed correctness - not relying on no-collisions in the hash
// function, NameIntCacheLRU should be used:
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
// are collisions (which we never found) two different paths would be
// mapped to the same ordinal...
this(cacheSize, LRUType.LRU_HASHED);
// are collisions two different paths would be mapped to the same
// ordinal...
this(cacheSize, LRUType.LRU_STRING);
}
/** Creates this with the specified method. */
@ -60,8 +64,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
// For guaranteed correctness - not relying on no-collisions in the hash
// function, NameIntCacheLRU should be used:
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
// are collisions (which we never found) two different paths would be
// mapped to the same ordinal...
// are collisions two different paths would be mapped to the same
// ordinal...
if (lruType == LRUType.LRU_HASHED) {
this.cache = new NameHashIntCacheLRU(cacheSize);
} else {

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.taxonomy.writercache;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.junit.Test;
public class TestLruTaxonomyWriterCache extends FacetTestCase {
@Test
public void testDefaultLRUTypeIsCollisionSafe() {
// These labels are clearly different, but have identical longHashCodes.
// Note that these labels are clearly contrived. We did encounter
// collisions in actual production data, but we aren't allowed to publish
// those.
final FacetLabel a = new FacetLabel("\0", "\u0003\uFFE2");
final FacetLabel b = new FacetLabel("\1", "\0");
// If this fails, then the longHashCode implementation has changed. This
// cannot prevent collisions. (All hashes must allow for collisions.) It
// will however stop the rest of this test from making sense. To fix, find
// new colliding labels, or make a subclass of FacetLabel that produces
// collisions.
assertEquals(a.longHashCode(), b.longHashCode());
// Make a cache with capacity > 2 so both our labels will fit. Don't
// specify an LRUType, since we want to check if the default is
// collision-safe.
final LruTaxonomyWriterCache cache = new LruTaxonomyWriterCache(10);
cache.put(a, 0);
cache.put(b, 1);
assertEquals(cache.get(a), 0);
assertEquals(cache.get(b), 1);
}
}