From 8b68bf60c9871ecb200f64c64bf55eb6ac456c0e Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 19 Oct 2021 08:00:00 -0400 Subject: [PATCH] LUCENE-10159: Fix invalid access in sorted set dv (#389) We introduced invalid accesses for sorted set doc values in LUCENE-9613. However, the issue has been unnoticed because the ordinals in doc values tests aren't complex enough to use high packed bits, and the 3 padding bytes make these invalid accesses perfectly fine. To reproduce this issue, we need to use at least 20 bits per value for the ordinals. --- .../lucene90/Lucene90DocValuesProducer.java | 27 +++++++-------- .../index/BaseDocValuesFormatTestCase.java | 33 +++++++++++++++++++ .../java/org/apache/lucene/util/TestUtil.java | 6 +++- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index eb58502e103..2561bbb1a41 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -1374,9 +1374,15 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { int i = 0; int count = 0; + boolean set = false; @Override public long nextOrd() throws IOException { + if (set == false) { + set = true; + i = 0; + count = ords.docValueCount(); + } if (i++ == count) { return NO_MORE_ORDS; } @@ -1385,13 +1391,8 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { @Override public boolean advanceExact(int target) throws IOException { - if (ords.advanceExact(target)) { - count = ords.docValueCount(); - i = 0; - return true; - } else { - return false; - } + set = false; + return ords.advanceExact(target); } @Override @@ -1401,18 +1402,14 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { @Override public int nextDoc() throws IOException { - int doc = ords.nextDoc(); - count = ords.docValueCount(); - i = 0; - return doc; + set = false; + return ords.nextDoc(); } @Override public int advance(int target) throws IOException { - int doc = ords.advance(target); - count = ords.docValueCount(); - i = 0; - return doc; + set = false; + return ords.advance(target); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java index 32ba8de6fe8..f9697303e7e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java @@ -3499,6 +3499,39 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes }); } + /** + * Tests where a DVField uses a high number of packed bits to store its ords. See: + * https://issues.apache.org/jira/browse/LUCENE-10159 + */ + @Nightly + public void testHighOrdsSortedSetDV() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(); + iwc.setRAMBufferSizeMB(8 + random().nextInt(64)); + IndexWriter writer = new IndexWriter(dir, iwc); + // many docs with some of them have very high ords + int numDocs = 20_000 + random().nextInt(10_000); + for (int i = 1; i < numDocs; i++) { + final int numOrds; + if (random().nextInt(100) <= 5) { + numOrds = 1000 + random().nextInt(500); + } else { + numOrds = random().nextInt(10); + } + Document doc = new Document(); + for (int ord = 0; ord < numOrds; ord++) { + doc.add( + new SortedSetDocValuesField("sorted_set_dv", TestUtil.randomBinaryTerm(random(), 2))); + } + writer.addDocument(doc); + } + writer.forceMerge(1, true); + try (DirectoryReader reader = DirectoryReader.open(writer)) { + TestUtil.checkReader(reader); + } + IOUtils.close(writer, dir); + } + private interface FieldCreator { public Field next(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java index c2ff1afd033..fb8a845fd03 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java @@ -1175,7 +1175,11 @@ public final class TestUtil { /** Returns a random binary term. */ public static BytesRef randomBinaryTerm(Random r) { - int length = r.nextInt(15); + return randomBinaryTerm(r, r.nextInt(15)); + } + + /** Returns a random binary with a given length */ + public static BytesRef randomBinaryTerm(Random r, int length) { BytesRef b = new BytesRef(length); r.nextBytes(b.bytes); b.length = length;