From 5f2a4998a079278ada89ce7bfa3992673a91c5c9 Mon Sep 17 00:00:00 2001 From: Greg Miller Date: Thu, 30 Jun 2022 14:01:14 -0700 Subject: [PATCH] LUCENE-10603: Migrate remaining SSDV iteration to use docValueCount in production code (#995) --- lucene/CHANGES.txt | 2 +- .../lucene80/Lucene80DocValuesConsumer.java | 18 ++---- .../simpletext/SimpleTextDocValuesWriter.java | 12 ++-- .../lucene/codecs/DocValuesConsumer.java | 5 +- .../lucene90/Lucene90DocValuesConsumer.java | 6 +- .../SortedSetDocValuesRangeQuery.java | 5 +- .../org/apache/lucene/index/CheckIndex.java | 9 +-- .../index/SortedSetDocValuesWriter.java | 9 ++- .../lucene/search/DocValuesRewriteMethod.java | 6 +- .../lucene/search/SortedSetSelector.java | 59 +++++-------------- .../grouping/TermGroupFacetCollector.java | 5 +- .../lucene/search/join/TermsCollector.java | 5 +- .../search/join/TermsWithScoreCollector.java | 10 ++-- .../models/documents/DocValuesAdapter.java | 5 +- .../lucene/misc/search/DocValuesStats.java | 5 +- .../sandbox/search/DocValuesTermsQuery.java | 6 +- .../analyzing/AnalyzingInfixSuggester.java | 5 +- 17 files changed, 54 insertions(+), 118 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 26faf461e76..4b86a89b2d7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -95,7 +95,7 @@ Improvements * LUCENE-10585: Facet module code cleanup (copy/paste scrubbing, simplification and some very minor optimization tweaks). (Greg Miller) -* LUCENE-10603: Update SortedSetDocValues iteration within faceting implementations to use +* LUCENE-10603: Update SortedSetDocValues iteration within production code to use SortedSetDocValues#docValueCount(). (Greg Miller) * GITHUB#983: AbstractSortedSetDocValueFacetCounts internal code cleanup/refactoring. (Greg Miller) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java index f9a53311189..e7eeb0fa027 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java @@ -959,11 +959,7 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer { long numOrds = 0; for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { numDocsWithField++; - for (long ord = values.nextOrd(); - ord != SortedSetDocValues.NO_MORE_ORDS; - ord = values.nextOrd()) { - numOrds++; - } + numOrds += values.docValueCount(); } if (numDocsWithField == numOrds) { @@ -1005,10 +1001,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer { LegacyDirectWriter writer = LegacyDirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd); values = valuesProducer.getSortedSet(field); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - for (long ord = values.nextOrd(); - ord != SortedSetDocValues.NO_MORE_ORDS; - ord = values.nextOrd()) { - writer.add(ord); + for (int i = 0; i < values.docValueCount(); i++) { + writer.add(values.nextOrd()); } } writer.finish(); @@ -1026,11 +1020,7 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer { addressesWriter.add(addr); values = valuesProducer.getSortedSet(field); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - values.nextOrd(); - addr++; - while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { - addr++; - } + addr += values.docValueCount(); addressesWriter.add(addr); } addressesWriter.finish(); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java index bce8bd03aa1..bd0c307acc6 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java @@ -434,13 +434,11 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer { SortedSetDocValues values = valuesProducer.getSortedSet(field); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { sb2.setLength(0); - for (long ord = values.nextOrd(); - ord != SortedSetDocValues.NO_MORE_ORDS; - ord = values.nextOrd()) { + for (int i = 0; i < values.docValueCount(); i++) { if (sb2.length() > 0) { sb2.append(","); } - sb2.append(Long.toString(ord)); + sb2.append(Long.toString(values.nextOrd())); } maxOrdListLength = Math.max(maxOrdListLength, sb2.length()); } @@ -490,13 +488,11 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer { } sb2.setLength(0); if (values.docID() == i) { - for (long ord = values.nextOrd(); - ord != SortedSetDocValues.NO_MORE_ORDS; - ord = values.nextOrd()) { + for (int j = 0; j < values.docValueCount(); j++) { if (sb2.length() > 0) { sb2.append(","); } - sb2.append(Long.toString(ord)); + sb2.append(Long.toString(values.nextOrd())); } } // now pad to fit: these are numbers so spaces work well. reader calls trim() diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index 5e24cf43b3c..8795d52fe1b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -836,9 +836,8 @@ public abstract class DocValuesConsumer implements Closeable { int docID; while ((docID = dv.nextDoc()) != NO_MORE_DOCS) { if (liveDocs.get(docID)) { - long ord; - while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - bitset.set(ord); + for (int i = 0; i < dv.docValueCount(); i++) { + bitset.set(dv.nextOrd()); } } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java index 494d0edc3bc..60f566a888d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java @@ -739,9 +739,9 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer { assert values.docID() == -1; for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - final long firstOrd = values.nextOrd(); - assert firstOrd != SortedSetDocValues.NO_MORE_ORDS; - if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) { + int docValueCount = values.docValueCount(); + assert docValueCount > 0; + if (docValueCount > 1) { return false; } } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java index d14784186b1..776ef988f28 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java @@ -176,9 +176,8 @@ abstract class SortedSetDocValuesRangeQuery extends Query { new TwoPhaseIterator(values) { @Override public boolean matches() throws IOException { - for (long ord = values.nextOrd(); - ord != SortedSetDocValues.NO_MORE_ORDS; - ord = values.nextOrd()) { + for (int i = 0; i < values.docValueCount(); i++) { + long ord = values.nextOrd(); if (ord < minOrd) { continue; } diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 83b31ea1ece..193cb4f2687 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -3354,9 +3354,8 @@ public final class CheckIndex implements Closeable { "advanceExact reports different value count: " + count + " != " + count2); } long lastOrd = -1; - long ord; int ordCount = 0; - while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + for (int i = 0; i < count; i++) { if (count != dv.docValueCount()) { throw new CheckIndexException( "value count changed from " @@ -3365,6 +3364,7 @@ public final class CheckIndex implements Closeable { + dv.docValueCount() + " during iterating over all values"); } + long ord = dv.nextOrd(); long ord2 = dv2.nextOrd(); if (ord != ord2) { throw new CheckIndexException( @@ -3393,11 +3393,6 @@ public final class CheckIndex implements Closeable { throw new CheckIndexException( "dv for field: " + fieldName + " returned docID=" + docID + " yet has no ordinals"); } - long ord2 = dv2.nextOrd(); - if (ord != ord2) { - throw new CheckIndexException( - "nextDoc and advanceExact report different ords: " + ord + " != " + ord2); - } } if (maxOrd != maxOrd2) { throw new CheckIndexException( diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java index fedc0bf6b3b..8b0fadafc7f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.index; -import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; @@ -450,10 +449,10 @@ class SortedSetDocValuesWriter extends DocValuesWriter { while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) { int newDocID = sortMap.oldToNew(docID); long startOffset = ordOffset; - long ord; - while ((ord = oldValues.nextOrd()) != NO_MORE_ORDS) { - builder.add(ord); - ordOffset++; + int docValueCount = oldValues.docValueCount(); + ordOffset += docValueCount; + for (int i = 0; i < docValueCount; i++) { + builder.add(oldValues.nextOrd()); } docValueCounts.set(newDocID, ordOffset - startOffset); if (startOffset != ordOffset) { // do we have any values? diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java index f1dadb5c7c7..51d0dd7078c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java @@ -169,10 +169,8 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod { @Override public boolean matches() throws IOException { - for (long ord = fcsi.nextOrd(); - ord != SortedSetDocValues.NO_MORE_ORDS; - ord = fcsi.nextOrd()) { - if (termSet.get(ord)) { + for (int i = 0; i < fcsi.docValueCount(); i++) { + if (termSet.get(fcsi.nextOrd())) { return true; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/SortedSetSelector.java b/lucene/core/src/java/org/apache/lucene/search/SortedSetSelector.java index 6f96ab87347..fcf5fba2d8a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SortedSetSelector.java +++ b/lucene/core/src/java/org/apache/lucene/search/SortedSetSelector.java @@ -22,7 +22,6 @@ import java.io.IOException; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; /** Selects a value from the document's set to use as the representative value */ @@ -226,13 +225,11 @@ public class SortedSetSelector { private void setOrd() throws IOException { if (docID() != NO_MORE_DOCS) { - while (true) { - long nextOrd = in.nextOrd(); - if (nextOrd == NO_MORE_ORDS) { - break; - } - ord = (int) nextOrd; + int docValueCount = in.docValueCount(); + for (int i = 0; i < docValueCount - 1; i++) { + in.nextOrd(); } + ord = (int) in.nextOrd(); } else { ord = (int) NO_MORE_ORDS; } @@ -243,7 +240,6 @@ public class SortedSetSelector { static class MiddleMinValue extends SortedDocValues { final SortedSetDocValues in; private int ord; - private int[] ords = new int[8]; MiddleMinValue(SortedSetDocValues in) { this.in = in; @@ -304,25 +300,12 @@ public class SortedSetSelector { private void setOrd() throws IOException { if (docID() != NO_MORE_DOCS) { - int upto = 0; - while (true) { - long nextOrd = in.nextOrd(); - if (nextOrd == NO_MORE_ORDS) { - break; - } - if (upto == ords.length) { - ords = ArrayUtil.grow(ords); - } - ords[upto++] = (int) nextOrd; - } - - if (upto == 0) { - // iterator should not have returned this docID if it has no ords: - assert false; - ord = (int) NO_MORE_ORDS; - } else { - ord = ords[(upto - 1) >>> 1]; + int docValueCount = in.docValueCount(); + int targetIdx = (docValueCount - 1) >>> 1; + for (int i = 0; i < targetIdx; i++) { + in.nextOrd(); } + ord = (int) in.nextOrd(); } else { ord = (int) NO_MORE_ORDS; } @@ -333,7 +316,6 @@ public class SortedSetSelector { static class MiddleMaxValue extends SortedDocValues { final SortedSetDocValues in; private int ord; - private int[] ords = new int[8]; MiddleMaxValue(SortedSetDocValues in) { this.in = in; @@ -394,25 +376,12 @@ public class SortedSetSelector { private void setOrd() throws IOException { if (docID() != NO_MORE_DOCS) { - int upto = 0; - while (true) { - long nextOrd = in.nextOrd(); - if (nextOrd == NO_MORE_ORDS) { - break; - } - if (upto == ords.length) { - ords = ArrayUtil.grow(ords); - } - ords[upto++] = (int) nextOrd; - } - - if (upto == 0) { - // iterator should not have returned this docID if it has no ords: - assert false; - ord = (int) NO_MORE_ORDS; - } else { - ord = ords[upto >>> 1]; + int docValueCount = in.docValueCount(); + int targetIdx = docValueCount >>> 1; + for (int i = 0; i < targetIdx; i++) { + in.nextOrd(); } + ord = (int) in.nextOrd(); } else { ord = (int) NO_MORE_ORDS; } diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java index 96c948db872..c1b49758b13 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java @@ -275,9 +275,8 @@ public abstract class TermGroupFacetCollector extends GroupFacetCollector { } boolean empty = true; if (doc == facetFieldDocTermOrds.docID()) { - long ord; - while ((ord = facetFieldDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - process(groupOrd, (int) ord); + for (int i = 0; i < facetFieldDocTermOrds.docValueCount(); i++) { + process(groupOrd, (int) facetFieldDocTermOrds.nextOrd()); empty = false; } } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java index de859374a82..cf3561e6b2e 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java @@ -62,13 +62,12 @@ abstract class TermsCollector extends DocValuesTermsCollector { @Override public void collect(int doc) throws IOException { - long ord; if (doc > docValues.docID()) { docValues.advance(doc); } if (doc == docValues.docID()) { - while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - final BytesRef term = docValues.lookupOrd(ord); + for (int i = 0; i < docValues.docValueCount(); i++) { + final BytesRef term = docValues.lookupOrd(docValues.nextOrd()); collectorTerms.add(term); } } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java index 00559d2f6ed..84252a7b15a 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java @@ -213,9 +213,8 @@ abstract class TermsWithScoreCollector extends DocValuesTermsCollector @Override public void collect(int doc) throws IOException { if (docValues.advanceExact(doc)) { - long ord; - while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - int termID = collectedTerms.add(docValues.lookupOrd(ord)); + for (int i = 0; i < docValues.docValueCount(); i++) { + int termID = collectedTerms.add(docValues.lookupOrd(docValues.nextOrd())); if (termID < 0) { termID = -termID - 1; } else { @@ -260,9 +259,8 @@ abstract class TermsWithScoreCollector extends DocValuesTermsCollector @Override public void collect(int doc) throws IOException { if (docValues.advanceExact(doc)) { - long ord; - while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - int termID = collectedTerms.add(docValues.lookupOrd(ord)); + for (int i = 0; i < docValues.docValueCount(); i++) { + int termID = collectedTerms.add(docValues.lookupOrd(docValues.nextOrd())); if (termID < 0) { termID = -termID - 1; } else { diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocValuesAdapter.java b/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocValuesAdapter.java index a61b72950c6..251a9375142 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocValuesAdapter.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocValuesAdapter.java @@ -143,9 +143,8 @@ final class DocValuesAdapter { if (ssvalues.advanceExact(docid)) { List values = new ArrayList<>(); - long ord; - while ((ord = ssvalues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - values.add(BytesRef.deepCopyOf(ssvalues.lookupOrd(ord))); + for (int i = 0; i < ssvalues.docValueCount(); i++) { + values.add(BytesRef.deepCopyOf(ssvalues.lookupOrd(ssvalues.nextOrd()))); } DocValues dv = DocValues.of(dvType, values, Collections.emptyList()); diff --git a/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java index 55b04327b3d..88053685878 100644 --- a/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java +++ b/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java @@ -403,9 +403,8 @@ public abstract class DocValuesStats { @Override protected void doAccumulate(int count) throws IOException { - long ord; - while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - BytesRef val = ssdv.lookupOrd(ord); + for (int i = 0; i < ssdv.docValueCount(); i++) { + BytesRef val = ssdv.lookupOrd(ssdv.nextOrd()); if (max == null || val.compareTo(max) > 0) { max = copyFrom(val, max); } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/DocValuesTermsQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/DocValuesTermsQuery.java index 68404e8d0fa..b95a7b2114c 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/DocValuesTermsQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/DocValuesTermsQuery.java @@ -210,10 +210,8 @@ public class DocValuesTermsQuery extends Query implements Accountable { @Override public boolean matches() throws IOException { - for (long ord = values.nextOrd(); - ord != SortedSetDocValues.NO_MORE_ORDS; - ord = values.nextOrd()) { - if (bits.get(ord)) { + for (int i = 0; i < values.docValueCount(); i++) { + if (bits.get(values.nextOrd())) { return true; } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 43e93990ae4..0c73fe1b69a 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -811,9 +811,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { contexts = new HashSet(); int targetDocID = fd.doc - leaves.get(segment).docBase; if (contextsDV.advance(targetDocID) == targetDocID) { - long ord; - while ((ord = contextsDV.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { - BytesRef context = BytesRef.deepCopyOf(contextsDV.lookupOrd(ord)); + for (int j = 0; j < contextsDV.docValueCount(); j++) { + BytesRef context = BytesRef.deepCopyOf(contextsDV.lookupOrd(contextsDV.nextOrd())); contexts.add(context); } }