LUCENE-10159: Fix invalid access in sorted set dv (#389)

We introduced invalid accesses for sorted set doc values in LUCENE-9613. 
However, the issue has been unnoticed because the ordinals in doc values
tests aren't complex enough to use high packed bits, and the 3 padding
bytes make these invalid accesses perfectly fine. To reproduce this
issue, we need to use at least 20 bits per value for the ordinals.
This commit is contained in:
Nhat Nguyen 2021-10-19 08:00:00 -04:00 committed by GitHub
parent 6c21862a55
commit 8b68bf60c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 50 additions and 16 deletions

View File

@ -1374,9 +1374,15 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
int i = 0; int i = 0;
int count = 0; int count = 0;
boolean set = false;
@Override @Override
public long nextOrd() throws IOException { public long nextOrd() throws IOException {
if (set == false) {
set = true;
i = 0;
count = ords.docValueCount();
}
if (i++ == count) { if (i++ == count) {
return NO_MORE_ORDS; return NO_MORE_ORDS;
} }
@ -1385,13 +1391,8 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override @Override
public boolean advanceExact(int target) throws IOException { public boolean advanceExact(int target) throws IOException {
if (ords.advanceExact(target)) { set = false;
count = ords.docValueCount(); return ords.advanceExact(target);
i = 0;
return true;
} else {
return false;
}
} }
@Override @Override
@ -1401,18 +1402,14 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override @Override
public int nextDoc() throws IOException { public int nextDoc() throws IOException {
int doc = ords.nextDoc(); set = false;
count = ords.docValueCount(); return ords.nextDoc();
i = 0;
return doc;
} }
@Override @Override
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
int doc = ords.advance(target); set = false;
count = ords.docValueCount(); return ords.advance(target);
i = 0;
return doc;
} }
@Override @Override

View File

@ -3499,6 +3499,39 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
}); });
} }
/**
* Tests where a DVField uses a high number of packed bits to store its ords. See:
* https://issues.apache.org/jira/browse/LUCENE-10159
*/
@Nightly
public void testHighOrdsSortedSetDV() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig();
iwc.setRAMBufferSizeMB(8 + random().nextInt(64));
IndexWriter writer = new IndexWriter(dir, iwc);
// many docs with some of them have very high ords
int numDocs = 20_000 + random().nextInt(10_000);
for (int i = 1; i < numDocs; i++) {
final int numOrds;
if (random().nextInt(100) <= 5) {
numOrds = 1000 + random().nextInt(500);
} else {
numOrds = random().nextInt(10);
}
Document doc = new Document();
for (int ord = 0; ord < numOrds; ord++) {
doc.add(
new SortedSetDocValuesField("sorted_set_dv", TestUtil.randomBinaryTerm(random(), 2)));
}
writer.addDocument(doc);
}
writer.forceMerge(1, true);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
TestUtil.checkReader(reader);
}
IOUtils.close(writer, dir);
}
private interface FieldCreator { private interface FieldCreator {
public Field next(); public Field next();

View File

@ -1175,7 +1175,11 @@ public final class TestUtil {
/** Returns a random binary term. */ /** Returns a random binary term. */
public static BytesRef randomBinaryTerm(Random r) { public static BytesRef randomBinaryTerm(Random r) {
int length = r.nextInt(15); return randomBinaryTerm(r, r.nextInt(15));
}
/** Returns a random binary with a given length */
public static BytesRef randomBinaryTerm(Random r, int length) {
BytesRef b = new BytesRef(length); BytesRef b = new BytesRef(length);
r.nextBytes(b.bytes); r.nextBytes(b.bytes);
b.length = length; b.length = length;