LUCENE-8380: UTF8TaxonomyWriterCache page/ offset calculation bug

This commit is contained in:
Dawid Weiss 2018-07-04 09:06:33 +02:00
parent b7b6f242e8
commit 0f652627a0
3 changed files with 28 additions and 16 deletions

View File

@ -129,6 +129,8 @@ API Changes:
Bug Fixes: Bug Fixes:
* LUCENE-8380: UTF8TaxonomyWriterCache inconsistency. (Ruslan Torobaev, Dawid Weiss)
* LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed * LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed
via LUCENE-8165 since we are now checking for offset+length going out of bounds. via LUCENE-8165 since we are now checking for offset+length going out of bounds.
(Robert Muir, Nhat Nyugen, Simon Willnauer) (Robert Muir, Nhat Nyugen, Simon Willnauer)

View File

@ -37,11 +37,13 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
return new BytesRefBuilder(); return new BytesRefBuilder();
} }
}; };
private final Counter bytesUsed = Counter.newCounter(); private final Counter bytesUsed = Counter.newCounter();
private final BytesRefHash map = new BytesRefHash(new ByteBlockPool(new DirectTrackingAllocator(bytesUsed))); private final BytesRefHash map = new BytesRefHash(new ByteBlockPool(new DirectTrackingAllocator(bytesUsed)));
private final static int ORDINALS_PAGE_SIZE = 65536; private final static int PAGE_BITS = 16;
private final static int ORDINALS_PAGE_MASK = ORDINALS_PAGE_SIZE - 1; private final static int PAGE_SIZE = 1 << PAGE_BITS;
private final static int PAGE_MASK = PAGE_SIZE - 1;
private volatile int[][] ordinals; private volatile int[][] ordinals;
@ -54,7 +56,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
/** Sole constructor. */ /** Sole constructor. */
public UTF8TaxonomyWriterCache() { public UTF8TaxonomyWriterCache() {
ordinals = new int[1][]; ordinals = new int[1][];
ordinals[0] = new int[ORDINALS_PAGE_SIZE]; ordinals[0] = new int[PAGE_SIZE];
} }
@Override @Override
@ -67,16 +69,16 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
if (id == -1) { if (id == -1) {
return LabelToOrdinal.INVALID_ORDINAL; return LabelToOrdinal.INVALID_ORDINAL;
} }
int page = id / ORDINALS_PAGE_SIZE; int page = id >>> PAGE_BITS;
int offset = id % ORDINALS_PAGE_MASK; int offset = id & PAGE_MASK;
return ordinals[page][offset]; return ordinals[page][offset];
} }
// Called only from assert // Called only from assert
private boolean assertSameOrdinal(FacetLabel label, int id, int ord) { private boolean assertSameOrdinal(FacetLabel label, int id, int ord) {
id = -id - 1; id = -id - 1;
int page = id / ORDINALS_PAGE_SIZE; int page = id >>> PAGE_BITS;
int offset = id % ORDINALS_PAGE_MASK; int offset = id & PAGE_MASK;
int oldOrd = ordinals[page][offset]; int oldOrd = ordinals[page][offset];
if (oldOrd != ord) { if (oldOrd != ord) {
throw new IllegalArgumentException("label " + label + " was already cached, with old ord=" + oldOrd + " versus new ord=" + ord); throw new IllegalArgumentException("label " + label + " was already cached, with old ord=" + oldOrd + " versus new ord=" + ord);
@ -95,15 +97,15 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
return false; return false;
} }
assert id == count; assert id == count;
int page = id / ORDINALS_PAGE_SIZE; int page = id >>> PAGE_BITS;
int offset = id % ORDINALS_PAGE_MASK; int offset = id & PAGE_MASK;
if (page == pageCount) { if (page == pageCount) {
if (page == ordinals.length) { if (page == ordinals.length) {
int[][] newOrdinals = new int[ArrayUtil.oversize(page+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][]; int[][] newOrdinals = new int[ArrayUtil.oversize(page+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
System.arraycopy(ordinals, 0, newOrdinals, 0, ordinals.length); System.arraycopy(ordinals, 0, newOrdinals, 0, ordinals.length);
ordinals = newOrdinals; ordinals = newOrdinals;
} }
ordinals[page] = new int[ORDINALS_PAGE_MASK]; ordinals[page] = new int[PAGE_SIZE];
pageCount++; pageCount++;
} }
ordinals[page][offset] = ord; ordinals[page][offset] = ord;
@ -125,7 +127,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
map.clear(); map.clear();
map.reinit(); map.reinit();
ordinals = new int[1][]; ordinals = new int[1][];
ordinals[0] = new int[ORDINALS_PAGE_SIZE]; ordinals[0] = new int[PAGE_SIZE];
count = 0; count = 0;
pageCount = 0; pageCount = 0;
assert bytesUsed.get() == 0; assert bytesUsed.get() == 0;
@ -138,7 +140,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
@Override @Override
public synchronized long ramBytesUsed() { public synchronized long ramBytesUsed() {
return bytesUsed.get() + pageCount * ORDINALS_PAGE_SIZE * RamUsageEstimator.NUM_BYTES_INT; return bytesUsed.get() + pageCount * PAGE_SIZE * Integer.BYTES;
} }
@Override @Override

View File

@ -28,6 +28,16 @@ import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
public class TestUTF8TaxonomyWriterCache extends FacetTestCase { public class TestUTF8TaxonomyWriterCache extends FacetTestCase {
public void testPageOverflow() throws Exception {
UTF8TaxonomyWriterCache cache = new UTF8TaxonomyWriterCache();
for (int ord = 0; ord < 65536 * 2; ord++) {
cache.put(new FacetLabel("foo:", Integer.toString(ord)), ord);
}
for (int ord = 0; ord < 65536 * 2; ord++) {
assertEquals(ord, cache.get(new FacetLabel("foo:", Integer.toString(ord))));
}
}
public void testRandom() throws Exception { public void testRandom() throws Exception {
LabelToOrdinal map = new LabelToOrdinalMap(); LabelToOrdinal map = new LabelToOrdinalMap();
@ -37,8 +47,6 @@ public class TestUTF8TaxonomyWriterCache extends FacetTestCase {
final int n = atLeast(10 * 1000); final int n = atLeast(10 * 1000);
final int numUniqueValues = 50 * 1000; final int numUniqueValues = 50 * 1000;
byte[] buffer = new byte[50];
Random random = random(); Random random = random();
Set<String> uniqueValuesSet = new HashSet<>(); Set<String> uniqueValuesSet = new HashSet<>();
while (uniqueValuesSet.size() < numUniqueValues) { while (uniqueValuesSet.size() < numUniqueValues) {