LUCENE-10603: Migrate remaining SSDV iteration to use docValueCount in production code (#995)

2022-06-30 14:01:14 -07:00 · 2022-06-30 14:01:14 -07:00 · 5f2a4998a0
parent e05b3ec7de
commit 5f2a4998a0
17 changed files with 54 additions and 118 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -95,7 +95,7 @@ Improvements
 * LUCENE-10585: Facet module code cleanup (copy/paste scrubbing, simplification and some very minor
  optimization tweaks). (Greg Miller)

-* LUCENE-10603: Update SortedSetDocValues iteration within faceting implementations to use
+* LUCENE-10603: Update SortedSetDocValues iteration within production code to use
  SortedSetDocValues#docValueCount(). (Greg Miller)

 * GITHUB#983: AbstractSortedSetDocValueFacetCounts internal code cleanup/refactoring. (Greg Miller)
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java
@ -959,11 +959,7 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
    long numOrds = 0;
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
      numDocsWithField++;
-      for (long ord = values.nextOrd();
-          ord != SortedSetDocValues.NO_MORE_ORDS;
-          ord = values.nextOrd()) {
-        numOrds++;
-      }
+      numOrds += values.docValueCount();
    }

    if (numDocsWithField == numOrds) {
@ -1005,10 +1001,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
    LegacyDirectWriter writer = LegacyDirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-      for (long ord = values.nextOrd();
-          ord != SortedSetDocValues.NO_MORE_ORDS;
-          ord = values.nextOrd()) {
-        writer.add(ord);
+      for (int i = 0; i < values.docValueCount(); i++) {
+        writer.add(values.nextOrd());
      }
    }
    writer.finish();
@ -1026,11 +1020,7 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
    addressesWriter.add(addr);
    values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-      values.nextOrd();
-      addr++;
-      while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
-        addr++;
-      }
+      addr += values.docValueCount();
      addressesWriter.add(addr);
    }
    addressesWriter.finish();
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java
@ -434,13 +434,11 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
    SortedSetDocValues values = valuesProducer.getSortedSet(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
      sb2.setLength(0);
-      for (long ord = values.nextOrd();
-          ord != SortedSetDocValues.NO_MORE_ORDS;
-          ord = values.nextOrd()) {
+      for (int i = 0; i < values.docValueCount(); i++) {
        if (sb2.length() > 0) {
          sb2.append(",");
        }
-        sb2.append(Long.toString(ord));
+        sb2.append(Long.toString(values.nextOrd()));
      }
      maxOrdListLength = Math.max(maxOrdListLength, sb2.length());
    }
@ -490,13 +488,11 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
      }
      sb2.setLength(0);
      if (values.docID() == i) {
-        for (long ord = values.nextOrd();
-            ord != SortedSetDocValues.NO_MORE_ORDS;
-            ord = values.nextOrd()) {
+        for (int j = 0; j < values.docValueCount(); j++) {
          if (sb2.length() > 0) {
            sb2.append(",");
          }
-          sb2.append(Long.toString(ord));
+          sb2.append(Long.toString(values.nextOrd()));
        }
      }
      // now pad to fit: these are numbers so spaces work well. reader calls trim()
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@ -836,9 +836,8 @@ public abstract class DocValuesConsumer implements Closeable {
        int docID;
        while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
          if (liveDocs.get(docID)) {
-            long ord;
-            while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-              bitset.set(ord);
+            for (int i = 0; i < dv.docValueCount(); i++) {
+              bitset.set(dv.nextOrd());
            }
          }
        }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
@ -739,9 +739,9 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {

    assert values.docID() == -1;
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-      final long firstOrd = values.nextOrd();
-      assert firstOrd != SortedSetDocValues.NO_MORE_ORDS;
-      if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
+      int docValueCount = values.docValueCount();
+      assert docValueCount > 0;
+      if (docValueCount > 1) {
        return false;
      }
    }
--- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java
@ -176,9 +176,8 @@ abstract class SortedSetDocValuesRangeQuery extends Query {
              new TwoPhaseIterator(values) {
                @Override
                public boolean matches() throws IOException {
-                  for (long ord = values.nextOrd();
-                      ord != SortedSetDocValues.NO_MORE_ORDS;
-                      ord = values.nextOrd()) {
+                  for (int i = 0; i < values.docValueCount(); i++) {
+                    long ord = values.nextOrd();
                    if (ord < minOrd) {
                      continue;
                    }
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -3354,9 +3354,8 @@ public final class CheckIndex implements Closeable {
            "advanceExact reports different value count: " + count + " != " + count2);
      }
      long lastOrd = -1;
-      long ord;
      int ordCount = 0;
-      while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+      for (int i = 0; i < count; i++) {
        if (count != dv.docValueCount()) {
          throw new CheckIndexException(
              "value count changed from "
@ -3365,6 +3364,7 @@ public final class CheckIndex implements Closeable {
                  + dv.docValueCount()
                  + " during iterating over all values");
        }
+        long ord = dv.nextOrd();
        long ord2 = dv2.nextOrd();
        if (ord != ord2) {
          throw new CheckIndexException(
@ -3393,11 +3393,6 @@ public final class CheckIndex implements Closeable {
        throw new CheckIndexException(
            "dv for field: " + fieldName + " returned docID=" + docID + " yet has no ordinals");
      }
-      long ord2 = dv2.nextOrd();
-      if (ord != ord2) {
-        throw new CheckIndexException(
-            "nextDoc and advanceExact report different ords: " + ord + " != " + ord2);
-      }
    }
    if (maxOrd != maxOrd2) {
      throw new CheckIndexException(
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
@ -16,7 +16,6 @@
 */
 package org.apache.lucene.index;

-import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;

@ -450,10 +449,10 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
      while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
        int newDocID = sortMap.oldToNew(docID);
        long startOffset = ordOffset;
-        long ord;
-        while ((ord = oldValues.nextOrd()) != NO_MORE_ORDS) {
-          builder.add(ord);
-          ordOffset++;
+        int docValueCount = oldValues.docValueCount();
+        ordOffset += docValueCount;
+        for (int i = 0; i < docValueCount; i++) {
+          builder.add(oldValues.nextOrd());
        }
        docValueCounts.set(newDocID, ordOffset - startOffset);
        if (startOffset != ordOffset) { // do we have any values?
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@ -169,10 +169,8 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {

                @Override
                public boolean matches() throws IOException {
-                  for (long ord = fcsi.nextOrd();
-                      ord != SortedSetDocValues.NO_MORE_ORDS;
-                      ord = fcsi.nextOrd()) {
-                    if (termSet.get(ord)) {
+                  for (int i = 0; i < fcsi.docValueCount(); i++) {
+                    if (termSet.get(fcsi.nextOrd())) {
                      return true;
                    }
                  }
--- a/lucene/core/src/java/org/apache/lucene/search/SortedSetSelector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SortedSetSelector.java
@ -22,7 +22,6 @@ import java.io.IOException;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;

 /** Selects a value from the document's set to use as the representative value */
@ -226,13 +225,11 @@ public class SortedSetSelector {

    private void setOrd() throws IOException {
      if (docID() != NO_MORE_DOCS) {
-        while (true) {
-          long nextOrd = in.nextOrd();
-          if (nextOrd == NO_MORE_ORDS) {
-            break;
-          }
-          ord = (int) nextOrd;
+        int docValueCount = in.docValueCount();
+        for (int i = 0; i < docValueCount - 1; i++) {
+          in.nextOrd();
        }
+        ord = (int) in.nextOrd();
      } else {
        ord = (int) NO_MORE_ORDS;
      }
@ -243,7 +240,6 @@ public class SortedSetSelector {
  static class MiddleMinValue extends SortedDocValues {
    final SortedSetDocValues in;
    private int ord;
-    private int[] ords = new int[8];

    MiddleMinValue(SortedSetDocValues in) {
      this.in = in;
@ -304,25 +300,12 @@ public class SortedSetSelector {

    private void setOrd() throws IOException {
      if (docID() != NO_MORE_DOCS) {
-        int upto = 0;
-        while (true) {
-          long nextOrd = in.nextOrd();
-          if (nextOrd == NO_MORE_ORDS) {
-            break;
-          }
-          if (upto == ords.length) {
-            ords = ArrayUtil.grow(ords);
-          }
-          ords[upto++] = (int) nextOrd;
-        }
-
-        if (upto == 0) {
-          // iterator should not have returned this docID if it has no ords:
-          assert false;
-          ord = (int) NO_MORE_ORDS;
-        } else {
-          ord = ords[(upto - 1) >>> 1];
+        int docValueCount = in.docValueCount();
+        int targetIdx = (docValueCount - 1) >>> 1;
+        for (int i = 0; i < targetIdx; i++) {
+          in.nextOrd();
        }
+        ord = (int) in.nextOrd();
      } else {
        ord = (int) NO_MORE_ORDS;
      }
@ -333,7 +316,6 @@ public class SortedSetSelector {
  static class MiddleMaxValue extends SortedDocValues {
    final SortedSetDocValues in;
    private int ord;
-    private int[] ords = new int[8];

    MiddleMaxValue(SortedSetDocValues in) {
      this.in = in;
@ -394,25 +376,12 @@ public class SortedSetSelector {

    private void setOrd() throws IOException {
      if (docID() != NO_MORE_DOCS) {
-        int upto = 0;
-        while (true) {
-          long nextOrd = in.nextOrd();
-          if (nextOrd == NO_MORE_ORDS) {
-            break;
-          }
-          if (upto == ords.length) {
-            ords = ArrayUtil.grow(ords);
-          }
-          ords[upto++] = (int) nextOrd;
-        }
-
-        if (upto == 0) {
-          // iterator should not have returned this docID if it has no ords:
-          assert false;
-          ord = (int) NO_MORE_ORDS;
-        } else {
-          ord = ords[upto >>> 1];
+        int docValueCount = in.docValueCount();
+        int targetIdx = docValueCount >>> 1;
+        for (int i = 0; i < targetIdx; i++) {
+          in.nextOrd();
        }
+        ord = (int) in.nextOrd();
      } else {
        ord = (int) NO_MORE_ORDS;
      }
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java
@ -275,9 +275,8 @@ public abstract class TermGroupFacetCollector extends GroupFacetCollector {
      }
      boolean empty = true;
      if (doc == facetFieldDocTermOrds.docID()) {
-        long ord;
-        while ((ord = facetFieldDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-          process(groupOrd, (int) ord);
+        for (int i = 0; i < facetFieldDocTermOrds.docValueCount(); i++) {
+          process(groupOrd, (int) facetFieldDocTermOrds.nextOrd());
          empty = false;
        }
      }
--- a/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
@ -62,13 +62,12 @@ abstract class TermsCollector<DV> extends DocValuesTermsCollector<DV> {

    @Override
    public void collect(int doc) throws IOException {
-      long ord;
      if (doc > docValues.docID()) {
        docValues.advance(doc);
      }
      if (doc == docValues.docID()) {
-        while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-          final BytesRef term = docValues.lookupOrd(ord);
+        for (int i = 0; i < docValues.docValueCount(); i++) {
+          final BytesRef term = docValues.lookupOrd(docValues.nextOrd());
          collectorTerms.add(term);
        }
      }
--- a/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
@ -213,9 +213,8 @@ abstract class TermsWithScoreCollector<DV> extends DocValuesTermsCollector<DV>
    @Override
    public void collect(int doc) throws IOException {
      if (docValues.advanceExact(doc)) {
-        long ord;
-        while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-          int termID = collectedTerms.add(docValues.lookupOrd(ord));
+        for (int i = 0; i < docValues.docValueCount(); i++) {
+          int termID = collectedTerms.add(docValues.lookupOrd(docValues.nextOrd()));
          if (termID < 0) {
            termID = -termID - 1;
          } else {
@ -260,9 +259,8 @@ abstract class TermsWithScoreCollector<DV> extends DocValuesTermsCollector<DV>
      @Override
      public void collect(int doc) throws IOException {
        if (docValues.advanceExact(doc)) {
-          long ord;
-          while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-            int termID = collectedTerms.add(docValues.lookupOrd(ord));
+          for (int i = 0; i < docValues.docValueCount(); i++) {
+            int termID = collectedTerms.add(docValues.lookupOrd(docValues.nextOrd()));
            if (termID < 0) {
              termID = -termID - 1;
            } else {
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocValuesAdapter.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocValuesAdapter.java
@ -143,9 +143,8 @@ final class DocValuesAdapter {
    if (ssvalues.advanceExact(docid)) {
      List<BytesRef> values = new ArrayList<>();

-      long ord;
-      while ((ord = ssvalues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-        values.add(BytesRef.deepCopyOf(ssvalues.lookupOrd(ord)));
+      for (int i = 0; i < ssvalues.docValueCount(); i++) {
+        values.add(BytesRef.deepCopyOf(ssvalues.lookupOrd(ssvalues.nextOrd())));
      }

      DocValues dv = DocValues.of(dvType, values, Collections.emptyList());
--- a/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/search/DocValuesStats.java
@ -403,9 +403,8 @@ public abstract class DocValuesStats<T> {

    @Override
    protected void doAccumulate(int count) throws IOException {
-      long ord;
-      while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-        BytesRef val = ssdv.lookupOrd(ord);
+      for (int i = 0; i < ssdv.docValueCount(); i++) {
+        BytesRef val = ssdv.lookupOrd(ssdv.nextOrd());
        if (max == null || val.compareTo(max) > 0) {
          max = copyFrom(val, max);
        }
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/DocValuesTermsQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/DocValuesTermsQuery.java
@ -210,10 +210,8 @@ public class DocValuesTermsQuery extends Query implements Accountable {

              @Override
              public boolean matches() throws IOException {
-                for (long ord = values.nextOrd();
-                    ord != SortedSetDocValues.NO_MORE_ORDS;
-                    ord = values.nextOrd()) {
-                  if (bits.get(ord)) {
+                for (int i = 0; i < values.docValueCount(); i++) {
+                  if (bits.get(values.nextOrd())) {
                    return true;
                  }
                }
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@ -811,9 +811,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
        contexts = new HashSet<BytesRef>();
        int targetDocID = fd.doc - leaves.get(segment).docBase;
        if (contextsDV.advance(targetDocID) == targetDocID) {
-          long ord;
-          while ((ord = contextsDV.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
-            BytesRef context = BytesRef.deepCopyOf(contextsDV.lookupOrd(ord));
+          for (int j = 0; j < contextsDV.docValueCount(); j++) {
+            BytesRef context = BytesRef.deepCopyOf(contextsDV.lookupOrd(contextsDV.nextOrd()));
            contexts.add(context);
          }
        }