mirror of https://github.com/apache/lucene.git
LUCENE-8380: UTF8TaxonomyWriterCache page/ offset calculation bug
This commit is contained in:
parent
b7b6f242e8
commit
0f652627a0
|
@ -129,6 +129,8 @@ API Changes:
|
||||||
|
|
||||||
Bug Fixes:
|
Bug Fixes:
|
||||||
|
|
||||||
|
* LUCENE-8380: UTF8TaxonomyWriterCache inconsistency. (Ruslan Torobaev, Dawid Weiss)
|
||||||
|
|
||||||
* LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed
|
* LUCENE-8164: IndexWriter silently accepts broken payload. This has been fixed
|
||||||
via LUCENE-8165 since we are now checking for offset+length going out of bounds.
|
via LUCENE-8165 since we are now checking for offset+length going out of bounds.
|
||||||
(Robert Muir, Nhat Nyugen, Simon Willnauer)
|
(Robert Muir, Nhat Nyugen, Simon Willnauer)
|
||||||
|
|
|
@ -37,11 +37,13 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
|
||||||
return new BytesRefBuilder();
|
return new BytesRefBuilder();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private final Counter bytesUsed = Counter.newCounter();
|
private final Counter bytesUsed = Counter.newCounter();
|
||||||
private final BytesRefHash map = new BytesRefHash(new ByteBlockPool(new DirectTrackingAllocator(bytesUsed)));
|
private final BytesRefHash map = new BytesRefHash(new ByteBlockPool(new DirectTrackingAllocator(bytesUsed)));
|
||||||
|
|
||||||
private final static int ORDINALS_PAGE_SIZE = 65536;
|
private final static int PAGE_BITS = 16;
|
||||||
private final static int ORDINALS_PAGE_MASK = ORDINALS_PAGE_SIZE - 1;
|
private final static int PAGE_SIZE = 1 << PAGE_BITS;
|
||||||
|
private final static int PAGE_MASK = PAGE_SIZE - 1;
|
||||||
|
|
||||||
private volatile int[][] ordinals;
|
private volatile int[][] ordinals;
|
||||||
|
|
||||||
|
@ -54,7 +56,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public UTF8TaxonomyWriterCache() {
|
public UTF8TaxonomyWriterCache() {
|
||||||
ordinals = new int[1][];
|
ordinals = new int[1][];
|
||||||
ordinals[0] = new int[ORDINALS_PAGE_SIZE];
|
ordinals[0] = new int[PAGE_SIZE];
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -67,16 +69,16 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
|
||||||
if (id == -1) {
|
if (id == -1) {
|
||||||
return LabelToOrdinal.INVALID_ORDINAL;
|
return LabelToOrdinal.INVALID_ORDINAL;
|
||||||
}
|
}
|
||||||
int page = id / ORDINALS_PAGE_SIZE;
|
int page = id >>> PAGE_BITS;
|
||||||
int offset = id % ORDINALS_PAGE_MASK;
|
int offset = id & PAGE_MASK;
|
||||||
return ordinals[page][offset];
|
return ordinals[page][offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Called only from assert
|
// Called only from assert
|
||||||
private boolean assertSameOrdinal(FacetLabel label, int id, int ord) {
|
private boolean assertSameOrdinal(FacetLabel label, int id, int ord) {
|
||||||
id = -id - 1;
|
id = -id - 1;
|
||||||
int page = id / ORDINALS_PAGE_SIZE;
|
int page = id >>> PAGE_BITS;
|
||||||
int offset = id % ORDINALS_PAGE_MASK;
|
int offset = id & PAGE_MASK;
|
||||||
int oldOrd = ordinals[page][offset];
|
int oldOrd = ordinals[page][offset];
|
||||||
if (oldOrd != ord) {
|
if (oldOrd != ord) {
|
||||||
throw new IllegalArgumentException("label " + label + " was already cached, with old ord=" + oldOrd + " versus new ord=" + ord);
|
throw new IllegalArgumentException("label " + label + " was already cached, with old ord=" + oldOrd + " versus new ord=" + ord);
|
||||||
|
@ -95,15 +97,15 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
assert id == count;
|
assert id == count;
|
||||||
int page = id / ORDINALS_PAGE_SIZE;
|
int page = id >>> PAGE_BITS;
|
||||||
int offset = id % ORDINALS_PAGE_MASK;
|
int offset = id & PAGE_MASK;
|
||||||
if (page == pageCount) {
|
if (page == pageCount) {
|
||||||
if (page == ordinals.length) {
|
if (page == ordinals.length) {
|
||||||
int[][] newOrdinals = new int[ArrayUtil.oversize(page+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
|
int[][] newOrdinals = new int[ArrayUtil.oversize(page+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
|
||||||
System.arraycopy(ordinals, 0, newOrdinals, 0, ordinals.length);
|
System.arraycopy(ordinals, 0, newOrdinals, 0, ordinals.length);
|
||||||
ordinals = newOrdinals;
|
ordinals = newOrdinals;
|
||||||
}
|
}
|
||||||
ordinals[page] = new int[ORDINALS_PAGE_MASK];
|
ordinals[page] = new int[PAGE_SIZE];
|
||||||
pageCount++;
|
pageCount++;
|
||||||
}
|
}
|
||||||
ordinals[page][offset] = ord;
|
ordinals[page][offset] = ord;
|
||||||
|
@ -125,7 +127,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
|
||||||
map.clear();
|
map.clear();
|
||||||
map.reinit();
|
map.reinit();
|
||||||
ordinals = new int[1][];
|
ordinals = new int[1][];
|
||||||
ordinals[0] = new int[ORDINALS_PAGE_SIZE];
|
ordinals[0] = new int[PAGE_SIZE];
|
||||||
count = 0;
|
count = 0;
|
||||||
pageCount = 0;
|
pageCount = 0;
|
||||||
assert bytesUsed.get() == 0;
|
assert bytesUsed.get() == 0;
|
||||||
|
@ -138,7 +140,7 @@ public final class UTF8TaxonomyWriterCache implements TaxonomyWriterCache, Accou
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized long ramBytesUsed() {
|
public synchronized long ramBytesUsed() {
|
||||||
return bytesUsed.get() + pageCount * ORDINALS_PAGE_SIZE * RamUsageEstimator.NUM_BYTES_INT;
|
return bytesUsed.get() + pageCount * PAGE_SIZE * Integer.BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -28,6 +28,16 @@ import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestUTF8TaxonomyWriterCache extends FacetTestCase {
|
public class TestUTF8TaxonomyWriterCache extends FacetTestCase {
|
||||||
|
public void testPageOverflow() throws Exception {
|
||||||
|
UTF8TaxonomyWriterCache cache = new UTF8TaxonomyWriterCache();
|
||||||
|
for (int ord = 0; ord < 65536 * 2; ord++) {
|
||||||
|
cache.put(new FacetLabel("foo:", Integer.toString(ord)), ord);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int ord = 0; ord < 65536 * 2; ord++) {
|
||||||
|
assertEquals(ord, cache.get(new FacetLabel("foo:", Integer.toString(ord))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testRandom() throws Exception {
|
public void testRandom() throws Exception {
|
||||||
LabelToOrdinal map = new LabelToOrdinalMap();
|
LabelToOrdinal map = new LabelToOrdinalMap();
|
||||||
|
@ -37,8 +47,6 @@ public class TestUTF8TaxonomyWriterCache extends FacetTestCase {
|
||||||
final int n = atLeast(10 * 1000);
|
final int n = atLeast(10 * 1000);
|
||||||
final int numUniqueValues = 50 * 1000;
|
final int numUniqueValues = 50 * 1000;
|
||||||
|
|
||||||
byte[] buffer = new byte[50];
|
|
||||||
|
|
||||||
Random random = random();
|
Random random = random();
|
||||||
Set<String> uniqueValuesSet = new HashSet<>();
|
Set<String> uniqueValuesSet = new HashSet<>();
|
||||||
while (uniqueValuesSet.size() < numUniqueValues) {
|
while (uniqueValuesSet.size() < numUniqueValues) {
|
||||||
|
|
Loading…
Reference in New Issue