From a383253fe1b0bf64f09034d435d5317670154bfa Mon Sep 17 00:00:00 2001 From: Lu Xugang Date: Thu, 2 Jun 2022 01:33:47 +0800 Subject: [PATCH] LUCENE-10598: SortedSetDocValues#docValueCount() should be always greater than zero (#934) * LUCENE-10598: SortedSetDocValues#docValueCount() should be always greater than zero --- lucene/CHANGES.txt | 2 ++ .../codecs/lucene90/Lucene90DocValuesProducer.java | 2 +- .../java/org/apache/lucene/index/CheckIndex.java | 13 +++++++++++++ .../org/apache/lucene/index/SortedSetDocValues.java | 5 +++-- .../lucene90/TestLucene90DocValuesFormat.java | 2 ++ .../org/apache/lucene/index/TestMultiDocValues.java | 1 + .../apache/lucene/index/TestSortingCodecReader.java | 1 + .../apache/lucene/index/memory/TestMemoryIndex.java | 1 + 8 files changed, 24 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4b3f15bb724..1021400df74 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -90,6 +90,8 @@ Bug Fixes * LUCENE-10582: Fix merging of overridden CollectionStatistics in CombinedFieldQuery (Yannick Welsch) +* LUCENE-10598: SortedSetDocValues#docValueCount() should be always greater than zero. (Lu Xugang) + Other --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index cb5a27aa033..2bb71aa32aa 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -1454,7 +1454,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { @Override public long docValueCount() { - return count; + return ords.docValueCount(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index e6bb6413112..2b47c6fdcbb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -3337,9 +3337,22 @@ public final class CheckIndex implements Closeable { LongBitSet seenOrds = new LongBitSet(dv.getValueCount()); long maxOrd2 = -1; for (int docID = dv.nextDoc(); docID != NO_MORE_DOCS; docID = dv.nextDoc()) { + long count = dv.docValueCount(); + if (count == 0) { + throw new CheckIndexException( + "sortedset dv for field: " + + fieldName + + " returned docValueCount=0 for docID=" + + docID); + } if (dv2.advanceExact(docID) == false) { throw new CheckIndexException("advanceExact did not find matching doc ID: " + docID); } + long count2 = dv2.docValueCount(); + if (count != count2) { + throw new CheckIndexException( + "advanceExact reports different value count: " + count + " != " + count2); + } long lastOrd = -1; long ord; int ordCount = 0; diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java index a44b8b0b560..4ebd938551e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java @@ -45,8 +45,9 @@ public abstract class SortedSetDocValues extends DocValuesIterator { public abstract long nextOrd() throws IOException; /** - * Retrieves the number of values for the current document. This must always be greater than zero. - * It is illegal to call this method after {@link #advanceExact(int)} returned {@code false}. + * Retrieves the number of unique ords for the current document. This must always be greater than + * zero. It is illegal to call this method after {@link #advanceExact(int)} returned {@code + * false}. */ public abstract long docValueCount(); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java index 2a11fefe0af..ca8fbb14e5b 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java @@ -265,6 +265,7 @@ public class TestLucene90DocValuesFormat extends BaseCompressingDocValuesFormatT assertTrue(valueSet.contains(sortedNumeric.nextValue())); } assertEquals(i, sortedSet.nextDoc()); + assertEquals(valueSet.size(), sortedSet.docValueCount()); int sortedSetCount = 0; while (true) { long ord = sortedSet.nextOrd(); @@ -488,6 +489,7 @@ public class TestLucene90DocValuesFormat extends BaseCompressingDocValuesFormatT for (int i = 0; i < maxDoc; ++i) { assertEquals(i, values.nextDoc()); final int numValues = in.readVInt(); + assertEquals(numValues, values.docValueCount()); for (int j = 0; j < numValues; ++j) { b.setLength(in.readVInt()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java index 018d7592bf1..9672fdac034 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java @@ -279,6 +279,7 @@ public class TestMultiDocValues extends LuceneTestCase { if (docID == NO_MORE_DOCS) { break; } + assertEquals(single.docValueCount(), multi.docValueCount()); ArrayList expectedList = new ArrayList<>(); long ord; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java index 09a8e9ee36e..42a898d87b4 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java @@ -230,6 +230,7 @@ public class TestSortingCodecReader extends LuceneTestCase { assertEquals( new BytesRef(ids.longValue() + ""), sorted_set_dv.lookupOrd(sorted_set_dv.nextOrd())); + assertEquals(1, sorted_set_dv.docValueCount()); assertEquals(1, sorted_numeric_dv.docValueCount()); assertEquals(ids.longValue(), sorted_numeric_dv.nextValue()); diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java index 4ce329a62ea..206c66b2516 100644 --- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java @@ -297,6 +297,7 @@ public class TestMemoryIndex extends LuceneTestCase { SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set"); assertEquals(3, sortedSetDocValues.getValueCount()); assertEquals(0, sortedSetDocValues.nextDoc()); + assertEquals(3, sortedSetDocValues.docValueCount()); assertEquals(0L, sortedSetDocValues.nextOrd()); assertEquals(1L, sortedSetDocValues.nextOrd()); assertEquals(2L, sortedSetDocValues.nextOrd());