mirror of https://github.com/apache/lucene.git
LUCENE-10159: Fix invalid access in sorted set dv (#389)
We introduced invalid accesses for sorted set doc values in LUCENE-9613. However, the issue has been unnoticed because the ordinals in doc values tests aren't complex enough to use high packed bits, and the 3 padding bytes make these invalid accesses perfectly fine. To reproduce this issue, we need to use at least 20 bits per value for the ordinals.
This commit is contained in:
parent
6c21862a55
commit
8b68bf60c9
|
@ -1374,9 +1374,15 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
int i = 0;
|
||||
int count = 0;
|
||||
boolean set = false;
|
||||
|
||||
@Override
|
||||
public long nextOrd() throws IOException {
|
||||
if (set == false) {
|
||||
set = true;
|
||||
i = 0;
|
||||
count = ords.docValueCount();
|
||||
}
|
||||
if (i++ == count) {
|
||||
return NO_MORE_ORDS;
|
||||
}
|
||||
|
@ -1385,13 +1391,8 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
if (ords.advanceExact(target)) {
|
||||
count = ords.docValueCount();
|
||||
i = 0;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
set = false;
|
||||
return ords.advanceExact(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1401,18 +1402,14 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
int doc = ords.nextDoc();
|
||||
count = ords.docValueCount();
|
||||
i = 0;
|
||||
return doc;
|
||||
set = false;
|
||||
return ords.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int doc = ords.advance(target);
|
||||
count = ords.docValueCount();
|
||||
i = 0;
|
||||
return doc;
|
||||
set = false;
|
||||
return ords.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -3499,6 +3499,39 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests where a DVField uses a high number of packed bits to store its ords. See:
|
||||
* https://issues.apache.org/jira/browse/LUCENE-10159
|
||||
*/
|
||||
@Nightly
|
||||
public void testHighOrdsSortedSetDV() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig();
|
||||
iwc.setRAMBufferSizeMB(8 + random().nextInt(64));
|
||||
IndexWriter writer = new IndexWriter(dir, iwc);
|
||||
// many docs with some of them have very high ords
|
||||
int numDocs = 20_000 + random().nextInt(10_000);
|
||||
for (int i = 1; i < numDocs; i++) {
|
||||
final int numOrds;
|
||||
if (random().nextInt(100) <= 5) {
|
||||
numOrds = 1000 + random().nextInt(500);
|
||||
} else {
|
||||
numOrds = random().nextInt(10);
|
||||
}
|
||||
Document doc = new Document();
|
||||
for (int ord = 0; ord < numOrds; ord++) {
|
||||
doc.add(
|
||||
new SortedSetDocValuesField("sorted_set_dv", TestUtil.randomBinaryTerm(random(), 2)));
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.forceMerge(1, true);
|
||||
try (DirectoryReader reader = DirectoryReader.open(writer)) {
|
||||
TestUtil.checkReader(reader);
|
||||
}
|
||||
IOUtils.close(writer, dir);
|
||||
}
|
||||
|
||||
private interface FieldCreator {
|
||||
public Field next();
|
||||
|
||||
|
|
|
@ -1175,7 +1175,11 @@ public final class TestUtil {
|
|||
|
||||
/** Returns a random binary term. */
|
||||
public static BytesRef randomBinaryTerm(Random r) {
|
||||
int length = r.nextInt(15);
|
||||
return randomBinaryTerm(r, r.nextInt(15));
|
||||
}
|
||||
|
||||
/** Returns a random binary with a given length */
|
||||
public static BytesRef randomBinaryTerm(Random r, int length) {
|
||||
BytesRef b = new BytesRef(length);
|
||||
r.nextBytes(b.bytes);
|
||||
b.length = length;
|
||||
|
|
Loading…
Reference in New Issue