LUCENE-8380: UTF8TaxonomyWriterCache page/ offset calculation bug

This commit is contained in:
Dawid Weiss 2018-07-04 09:06:33 +02:00
parent b7b6f242e8
commit 0f652627a0
3 changed files with 28 additions and 16 deletions

View File

@ -129,6 +129,8 @@ API Changes:
Bug Fixes:
* LUCENE-8380: UTF8TaxonomyWriterCache inconsistency. (Ruslan Torobaev, Dawid Weiss)
* LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed
via LUCENE-8165 since we are now checking for offset+length going out of bounds.
(Robert Muir, Nhat Nyugen, Simon Willnauer)

View File

@ -37,12 +37,14 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
return new BytesRefBuilder();
}
};
private final Counter bytesUsed = Counter.newCounter();
private final BytesRefHash map = new BytesRefHash(new ByteBlockPool(new DirectTrackingAllocator(bytesUsed)));
private final static int ORDINALS_PAGE_SIZE = 65536;
private final static int ORDINALS_PAGE_MASK = ORDINALS_PAGE_SIZE - 1;
private final static int PAGE_BITS = 16;
private final static int PAGE_SIZE = 1 << PAGE_BITS;
private final static int PAGE_MASK = PAGE_SIZE - 1;
private volatile int[][] ordinals;
// How many labels we are storing:
@ -54,7 +56,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
/** Sole constructor. */
public UTF8TaxonomyWriterCache() {
ordinals = new int[1][];
ordinals[0] = new int[ORDINALS_PAGE_SIZE];
ordinals[0] = new int[PAGE_SIZE];
}
@Override
@ -67,16 +69,16 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
if (id == -1) {
return LabelToOrdinal.INVALID_ORDINAL;
}
int page = id / ORDINALS_PAGE_SIZE;
int offset = id % ORDINALS_PAGE_MASK;
int page = id >>> PAGE_BITS;
int offset = id & PAGE_MASK;
return ordinals[page][offset];
}
// Called only from assert
private boolean assertSameOrdinal(FacetLabel label, int id, int ord) {
id = -id - 1;
int page = id / ORDINALS_PAGE_SIZE;
int offset = id % ORDINALS_PAGE_MASK;
int page = id >>> PAGE_BITS;
int offset = id & PAGE_MASK;
int oldOrd = ordinals[page][offset];
if (oldOrd != ord) {
throw new IllegalArgumentException("label " + label + " was already cached, with old ord=" + oldOrd + " versus new ord=" + ord);
@ -95,15 +97,15 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
return false;
}
assert id == count;
int page = id / ORDINALS_PAGE_SIZE;
int offset = id % ORDINALS_PAGE_MASK;
int page = id >>> PAGE_BITS;
int offset = id & PAGE_MASK;
if (page == pageCount) {
if (page == ordinals.length) {
int[][] newOrdinals = new int[ArrayUtil.oversize(page+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
System.arraycopy(ordinals, 0, newOrdinals, 0, ordinals.length);
ordinals = newOrdinals;
}
ordinals[page] = new int[ORDINALS_PAGE_MASK];
ordinals[page] = new int[PAGE_SIZE];
pageCount++;
}
ordinals[page][offset] = ord;
@ -125,7 +127,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
map.clear();
map.reinit();
ordinals = new int[1][];
ordinals[0] = new int[ORDINALS_PAGE_SIZE];
ordinals[0] = new int[PAGE_SIZE];
count = 0;
pageCount = 0;
assert bytesUsed.get() == 0;
@ -138,7 +140,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
@Override
public synchronized long ramBytesUsed() {
return bytesUsed.get() + pageCount * ORDINALS_PAGE_SIZE * RamUsageEstimator.NUM_BYTES_INT;
return bytesUsed.get() + pageCount * PAGE_SIZE * Integer.BYTES;
}
@Override
@ -146,7 +148,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
}
private static final byte DELIM_CHAR = (byte) 0x1F;
private BytesRef toBytes(FacetLabel label) {
BytesRefBuilder bytes = this.bytes.get();
bytes.clear();

View File

@ -28,6 +28,16 @@ import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.util.TestUtil;
public class TestUTF8TaxonomyWriterCache extends FacetTestCase {
public void testPageOverflow() throws Exception {
UTF8TaxonomyWriterCache cache = new UTF8TaxonomyWriterCache();
for (int ord = 0; ord < 65536 * 2; ord++) {
cache.put(new FacetLabel("foo:", Integer.toString(ord)), ord);
}
for (int ord = 0; ord < 65536 * 2; ord++) {
assertEquals(ord, cache.get(new FacetLabel("foo:", Integer.toString(ord))));
}
}
public void testRandom() throws Exception {
LabelToOrdinal map = new LabelToOrdinalMap();
@ -37,8 +47,6 @@ public class TestUTF8TaxonomyWriterCache extends FacetTestCase {
final int n = atLeast(10 * 1000);
final int numUniqueValues = 50 * 1000;
byte[] buffer = new byte[50];
Random random = random();
Set<String> uniqueValuesSet = new HashSet<>();
while (uniqueValuesSet.size() < numUniqueValues) {