Add levels to DocValues skipper index (#13563)

Adding levels o be able to skip several intervals in one step.
2024-07-19 11:20:16 +02:00 · 2024-07-19 11:20:16 +02:00 · 9f991ed07e
parent c245ed2fb4
commit 9f991ed07e
7 changed files with 198 additions and 56 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -133,6 +133,8 @@ New Features
  DocValuesSkipper abstraction. A new flag is added to FieldType.java that configures whether
  to create a "skip index" for doc values. (Ignacio Vera)

+* GITHUB#13563: Add levels to doc values skip index. (Ignacio Vera)
+
 Improvements
 ---------------------

--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
@ -19,9 +19,13 @@ package org.apache.lucene.codecs.lucene90;
 import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
 import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SHIFT;
 import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SIZE;
+import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_LEVEL_SHIFT;
+import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_MAX_LEVEL;

 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
@ -43,7 +47,6 @@ import org.apache.lucene.search.SortedSetSelector;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.ByteBuffersDataOutput;
 import org.apache.lucene.store.ByteBuffersIndexOutput;
-import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@ -207,65 +210,130 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
      maxValue = Math.max(maxValue, value);
    }

+    void accumulate(SkipAccumulator other) {
+      assert minDocID <= other.minDocID && maxDocID < other.maxDocID;
+      maxDocID = other.maxDocID;
+      minValue = Math.min(minValue, other.minValue);
+      maxValue = Math.max(maxValue, other.maxValue);
+      docCount += other.docCount;
+    }
+
    void nextDoc(int docID) {
      maxDocID = docID;
      ++docCount;
    }

-    void writeTo(DataOutput output) throws IOException {
-      output.writeInt(maxDocID);
-      output.writeInt(minDocID);
-      output.writeLong(maxValue);
-      output.writeLong(minValue);
-      output.writeInt(docCount);
+    public static SkipAccumulator merge(List<SkipAccumulator> list, int index, int length) {
+      SkipAccumulator acc = new SkipAccumulator(list.get(index).minDocID);
+      for (int i = 0; i < length; i++) {
+        acc.accumulate(list.get(index + i));
+      }
+      return acc;
    }
  }

  private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer)
      throws IOException {
    assert field.hasDocValuesSkipIndex();
-    // TODO: This disk compression once we introduce levels
-    long start = data.getFilePointer();
-    SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
+    final long start = data.getFilePointer();
+    final SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
    long globalMaxValue = Long.MIN_VALUE;
    long globalMinValue = Long.MAX_VALUE;
    int globalDocCount = 0;
    int maxDocId = -1;
+    final List<SkipAccumulator> accumulators = new ArrayList<>();
    SkipAccumulator accumulator = null;
-    int counter = 0;
+    final int maxAccumulators = 1 << (SKIP_INDEX_LEVEL_SHIFT * (SKIP_INDEX_MAX_LEVEL - 1));
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-      if (counter == 0) {
+      if (accumulator == null) {
        accumulator = new SkipAccumulator(doc);
+        accumulators.add(accumulator);
      }
      accumulator.nextDoc(doc);
      for (int i = 0, end = values.docValueCount(); i < end; ++i) {
        accumulator.accumulate(values.nextValue());
      }
-      if (++counter == skipIndexIntervalSize) {
+      if (accumulator.docCount == skipIndexIntervalSize) {
        globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
        globalMinValue = Math.min(globalMinValue, accumulator.minValue);
        globalDocCount += accumulator.docCount;
        maxDocId = accumulator.maxDocID;
-        accumulator.writeTo(data);
-        counter = 0;
+        accumulator = null;
+        if (accumulators.size() == maxAccumulators) {
+          writeLevels(accumulators);
+          accumulators.clear();
+        }
      }
    }

-    if (counter > 0) {
-      globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
-      globalMinValue = Math.min(globalMinValue, accumulator.minValue);
-      globalDocCount += accumulator.docCount;
-      maxDocId = accumulator.maxDocID;
-      accumulator.writeTo(data);
+    if (accumulators.isEmpty() == false) {
+      if (accumulator != null) {
+        globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
+        globalMinValue = Math.min(globalMinValue, accumulator.minValue);
+        globalDocCount += accumulator.docCount;
+        maxDocId = accumulator.maxDocID;
+      }
+      writeLevels(accumulators);
    }
    meta.writeLong(start); // record the start in meta
    meta.writeLong(data.getFilePointer() - start); // record the length
+    assert globalDocCount == 0 || globalMaxValue >= globalMinValue;
    meta.writeLong(globalMaxValue);
    meta.writeLong(globalMinValue);
+    assert globalDocCount <= maxDocId + 1;
    meta.writeInt(globalDocCount);
    meta.writeInt(maxDocId);
  }

+  private void writeLevels(List<SkipAccumulator> accumulators) throws IOException {
+    final List<List<SkipAccumulator>> accumulatorsLevels = new ArrayList<>(SKIP_INDEX_MAX_LEVEL);
+    accumulatorsLevels.add(accumulators);
+    for (int i = 0; i < SKIP_INDEX_MAX_LEVEL - 1; i++) {
+      accumulatorsLevels.add(buildLevel(accumulatorsLevels.get(i)));
+    }
+    int totalAccumulators = accumulators.size();
+    for (int index = 0; index < totalAccumulators; index++) {
+      // compute how many levels we need to write for the current accumulator
+      final int levels = getLevels(index, totalAccumulators);
+      // write the number of levels
+      data.writeByte((byte) levels);
+      // write intervals in reverse order. This is done so we don't
+      // need to read all of them in case of slipping
+      for (int level = levels - 1; level >= 0; level--) {
+        final SkipAccumulator accumulator =
+            accumulatorsLevels.get(level).get(index >> (SKIP_INDEX_LEVEL_SHIFT * level));
+        data.writeInt(accumulator.maxDocID);
+        data.writeInt(accumulator.minDocID);
+        data.writeLong(accumulator.maxValue);
+        data.writeLong(accumulator.minValue);
+        data.writeInt(accumulator.docCount);
+      }
+    }
+  }
+
+  private static List<SkipAccumulator> buildLevel(List<SkipAccumulator> accumulators) {
+    final int levelSize = 1 << SKIP_INDEX_LEVEL_SHIFT;
+    final List<SkipAccumulator> collector = new ArrayList<>();
+    for (int i = 0; i < accumulators.size() - levelSize + 1; i += levelSize) {
+      collector.add(SkipAccumulator.merge(accumulators, i, levelSize));
+    }
+    return collector;
+  }
+
+  private static int getLevels(int index, int size) {
+    if (Integer.numberOfTrailingZeros(index) >= SKIP_INDEX_LEVEL_SHIFT) {
+      // TODO: can we do it in constant time rather than linearly with SKIP_INDEX_MAX_LEVEL?
+      final int left = size - index;
+      for (int level = SKIP_INDEX_MAX_LEVEL - 1; level > 0; level--) {
+        final int numberIntervals = 1 << (SKIP_INDEX_LEVEL_SHIFT * level);
+        if (left >= numberIntervals && index % numberIntervals == 0) {
+          return level + 1;
+        }
+      }
+    }
+    return 1;
+  }
+
  private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords)
      throws IOException {
    SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java
@ -194,5 +194,36 @@ public final class Lucene90DocValuesFormat extends DocValuesFormat {
  static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT;
  static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1;

+  // number of documents in an interval
  private static final int DEFAULT_SKIP_INDEX_INTERVAL_SIZE = 4096;
+  // bytes on an interval:
+  //   * 1 byte : number of levels
+  //   * 16 bytes: min / max value,
+  //   * 8 bytes:  min / max docID
+  //   * 4 bytes: number of documents
+  private static final long SKIP_INDEX_INTERVAL_BYTES = 29L;
+  // number of intervals represented as a shift to create a new level, this is 1 << 3 == 8
+  // intervals.
+  static final int SKIP_INDEX_LEVEL_SHIFT = 3;
+  // max number of levels
+  // Increasing this number, it increases how much heap we need at index time.
+  // we currently need (1 * 8 * 8 * 8)  = 512 accumulators on heap
+  static final int SKIP_INDEX_MAX_LEVEL = 4;
+  // number of bytes to skip when skipping a level. It does not take into account the
+  // current interval that is being read.
+  static final long[] SKIP_INDEX_JUMP_LENGTH_PER_LEVEL = new long[SKIP_INDEX_MAX_LEVEL];
+
+  static {
+    // Size of the interval minus read bytes (1 byte for level and 4 bytes for maxDocID)
+    SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[0] = SKIP_INDEX_INTERVAL_BYTES - 5L;
+    for (int level = 1; level < SKIP_INDEX_MAX_LEVEL; level++) {
+      // jump from previous level
+      SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level] = SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level - 1];
+      // nodes added by new level
+      SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level] +=
+          (1 << (level * SKIP_INDEX_LEVEL_SHIFT)) * SKIP_INDEX_INTERVAL_BYTES;
+      // remove the byte levels added in the previous level
+      SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level] -= (1 << ((level - 1) * SKIP_INDEX_LEVEL_SHIFT));
+    }
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.codecs.lucene90;

+import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL;
+import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_MAX_LEVEL;
 import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT;

 import java.io.IOException;
@ -1792,28 +1794,55 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
    if (input.length() > 0) {
      input.prefetch(0, 1);
    }
+    // TODO: should we write to disk the actual max level for this segment?
    return new DocValuesSkipper() {
-      int minDocID = -1;
-      int maxDocID = -1;
-      long minValue, maxValue;
-      int docCount;
+      final int[] minDocID = new int[SKIP_INDEX_MAX_LEVEL];
+      final int[] maxDocID = new int[SKIP_INDEX_MAX_LEVEL];
+
+      {
+        for (int i = 0; i < SKIP_INDEX_MAX_LEVEL; i++) {
+          minDocID[i] = maxDocID[i] = -1;
+        }
+      }
+
+      final long[] minValue = new long[SKIP_INDEX_MAX_LEVEL];
+      final long[] maxValue = new long[SKIP_INDEX_MAX_LEVEL];
+      final int[] docCount = new int[SKIP_INDEX_MAX_LEVEL];
+      int levels = 1;

      @Override
      public void advance(int target) throws IOException {
        if (target > entry.maxDocId) {
-          minDocID = DocIdSetIterator.NO_MORE_DOCS;
-          maxDocID = DocIdSetIterator.NO_MORE_DOCS;
+          // skipper is exhausted
+          for (int i = 0; i < SKIP_INDEX_MAX_LEVEL; i++) {
+            minDocID[i] = maxDocID[i] = DocIdSetIterator.NO_MORE_DOCS;
+          }
        } else {
+          // find next interval
+          assert target > maxDocID[0] : "target must be bigger that current interval";
          while (true) {
-            maxDocID = input.readInt();
-            if (maxDocID >= target) {
-              minDocID = input.readInt();
-              maxValue = input.readLong();
-              minValue = input.readLong();
-              docCount = input.readInt();
+            levels = input.readByte();
+            assert levels <= SKIP_INDEX_MAX_LEVEL && levels > 0
+                : "level out of range [" + levels + "]";
+            boolean valid = true;
+            // check if current interval is competitive or we can jump to the next position
+            for (int level = levels - 1; level >= 0; level--) {
+              if ((maxDocID[level] = input.readInt()) < target) {
+                input.skipBytes(SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level]); // the jump for the level
+                valid = false;
+                break;
+              }
+              minDocID[level] = input.readInt();
+              maxValue[level] = input.readLong();
+              minValue[level] = input.readLong();
+              docCount[level] = input.readInt();
+            }
+            if (valid) {
+              // adjust levels
+              while (levels < SKIP_INDEX_MAX_LEVEL && maxDocID[levels] >= target) {
+                levels++;
+              }
              break;
-            } else {
-              input.skipBytes(24);
            }
          }
        }
@ -1821,32 +1850,32 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {

      @Override
      public int numLevels() {
-        return 1;
+        return levels;
      }

      @Override
      public int minDocID(int level) {
-        return minDocID;
+        return minDocID[level];
      }

      @Override
      public int maxDocID(int level) {
-        return maxDocID;
+        return maxDocID[level];
      }

      @Override
      public long minValue(int level) {
-        return minValue;
+        return minValue[level];
      }

      @Override
      public long maxValue(int level) {
-        return maxValue;
+        return maxValue[level];
      }

      @Override
      public int docCount(int level) {
-        return docCount;
+        return docCount[level];
      }

      @Override
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -3302,17 +3302,17 @@ public final class CheckIndex implements Closeable {
      if (skipper.maxDocID(0) == NO_MORE_DOCS) {
        break;
      }
+      if (skipper.minDocID(0) < doc) {
+        throw new CheckIndexException(
+            "skipper dv iterator for field: "
+                + fieldName
+                + " reports wrong minDocID, got "
+                + skipper.minDocID(0)
+                + " < "
+                + doc);
+      }
      int levels = skipper.numLevels();
      for (int level = 0; level < levels; level++) {
-        if (skipper.minDocID(level) < doc) {
-          throw new CheckIndexException(
-              "skipper dv iterator for field: "
-                  + fieldName
-                  + " reports wrong minDocID, got "
-                  + skipper.minDocID(level)
-                  + " < "
-                  + doc);
-        }
        if (skipper.minDocID(level) > skipper.maxDocID(level)) {
          throw new CheckIndexException(
              "skipper dv iterator for field: "
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormatVariableSkipInterval.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormatVariableSkipInterval.java
@ -25,7 +25,8 @@ public class TestLucene90DocValuesFormatVariableSkipInterval extends BaseDocValu

  @Override
  protected Codec getCodec() {
-    return TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(random().nextInt(2, 1024)));
+    // small interval size to test with many intervals
+    return TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(random().nextInt(4, 16)));
  }

  public void testSkipIndexIntervalSize() {
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java
@ -773,6 +773,13 @@ public abstract class BaseDocValuesFormatTestCase extends LegacyBaseDocValuesFor
            maxVal <= skipper.maxValue(0));
      }
      docCount += skipper.docCount(0);
+      for (int level = 1; level < skipper.numLevels(); level++) {
+        assertTrue(skipper.minDocID(0) >= skipper.minDocID(level));
+        assertTrue(skipper.maxDocID(0) <= skipper.maxDocID(level));
+        assertTrue(skipper.minValue(0) >= skipper.minValue(level));
+        assertTrue(skipper.maxValue(0) <= skipper.maxValue(level));
+        assertTrue(skipper.docCount(0) < skipper.docCount(level));
+      }
    }

    assertEquals(docCount, skipper.docCount());
@ -784,19 +791,23 @@ public abstract class BaseDocValuesFormatTestCase extends LegacyBaseDocValuesFor
    if (skipper == null) {
      return;
    }
+    int nextLevel = 0;
    while (true) {
-      int doc = random().nextInt(skipper.maxDocID(0), maxDoc + 1) + 1;
+      int doc = random().nextInt(skipper.maxDocID(nextLevel), maxDoc + 1) + 1;
      skipper.advance(doc);
      if (skipper.minDocID(0) == NO_MORE_DOCS) {
        assertEquals(NO_MORE_DOCS, skipper.maxDocID(0));
        return;
      }
      if (iterator.advanceExact(doc)) {
-        assertTrue(iterator.docID() >= skipper.minDocID(0));
-        assertTrue(iterator.docID() <= skipper.maxDocID(0));
-        assertTrue(iterator.minValue() >= skipper.minValue(0));
-        assertTrue(iterator.maxValue() <= skipper.maxValue(0));
+        for (int level = 0; level < skipper.numLevels(); level++) {
+          assertTrue(iterator.docID() >= skipper.minDocID(level));
+          assertTrue(iterator.docID() <= skipper.maxDocID(level));
+          assertTrue(iterator.minValue() >= skipper.minValue(level));
+          assertTrue(iterator.maxValue() <= skipper.maxValue(level));
+        }
      }
+      nextLevel = random().nextInt(skipper.numLevels());
    }
  }