From 58a7a8ada5cebeb261060c56cd6d0a9446478bf6 Mon Sep 17 00:00:00 2001
From: Toke Eskildsen <toke@apache.org>
Date: Mon, 3 Dec 2018 14:23:11 +0100
Subject: [PATCH] LUCENE-8374 part 1/4: Reduce reads for sparse DocValues

Offset and index jump-table for IndexedDISI blocks.
---
 lucene/CHANGES.txt                            |   3 +
 .../lucene/codecs/lucene70/IndexedDISI.java   |  65 ++++-
 .../codecs/lucene70/IndexedDISICache.java     | 234 ++++++++++++++++++
 .../lucene70/IndexedDISICacheFactory.java     | 150 +++++++++++
 .../lucene70/Lucene70DocValuesProducer.java   |  85 +++++--
 .../codecs/lucene70/TestIndexedDISI.java      |  91 +++++--
 .../apache/lucene/index/TestDocValues.java    | 105 ++++++++
 7 files changed, 684 insertions(+), 49 deletions(-)
 create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
 create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6b001b941f5..349d64d6f40 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -208,6 +208,9 @@ Optimizations
   to early terminate the iterator if the minimum score is greater than the constant
   score. (Christophe Bismuth via Jim Ferenczi)
 
+* LUCENE-8374: Reduce reads for sparse DocValues and whole number numeric DocValues.
+  (Toke Eskildsen)
+
 ======================= Lucene 7.7.0 =======================
 
 Build
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
index 6138896264a..114710e0cd7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
@@ -50,6 +50,9 @@ import org.apache.lucene.util.RoaringDocIdSet;
 final class IndexedDISI extends DocIdSetIterator {
 
   static final int MAX_ARRAY_LENGTH = (1 << 12) - 1;
+  static final String NO_NAME = "n/a";
+
+  public final String name;
 
   private static void flush(int block, FixedBitSet buffer, int cardinality, IndexOutput out) throws IOException {
     assert block >= 0 && block < 65536;
@@ -98,19 +101,49 @@ final class IndexedDISI extends DocIdSetIterator {
   /** The slice that stores the {@link DocIdSetIterator}. */
   private final IndexInput slice;
   private final long cost;
+  private final IndexedDISICache cache;
 
   IndexedDISI(IndexInput in, long offset, long length, long cost) throws IOException {
-    this(in.slice("docs", offset, length), cost);
+    this(in, offset, length, cost, NO_NAME);
   }
 
+  IndexedDISI(IndexInput in, long offset, long length, long cost, String name) throws IOException {
+    this(in, offset, length, cost, null, name);
+  }
+
+  IndexedDISI(IndexInput in, long offset, long length, long cost, IndexedDISICache cache) throws IOException {
+    this(in, offset, length, cost, cache, NO_NAME);
+  }
+
+  IndexedDISI(IndexInput in, long offset, long length, long cost, IndexedDISICache cache, String name) throws IOException {
+    this(in.slice("docs", offset, length), cost, cache, name);
+  }
+
+  IndexedDISI(IndexInput slice, long cost) throws IOException {
+    this(slice, cost, NO_NAME);
+  }
   // This constructor allows to pass the slice directly in case it helps reuse
   // see eg. Lucene70 norms producer's merge instance
-  IndexedDISI(IndexInput slice, long cost) throws IOException {
+  IndexedDISI(IndexInput slice, long cost, String name) throws IOException {
+    this(slice, cost, null, name);
+//    IndexedDISICacheFactory.debug(
+//        "Non-cached direct slice IndexedDISI with length " + slice.length() + ": " + slice.toString());
+  }
+
+  IndexedDISI(IndexInput slice, long cost, IndexedDISICache cache) throws IOException {
+    this(slice, cost, cache, NO_NAME);
+  }
+  // This constructor allows to pass the slice directly in case it helps reuse
+  // see eg. Lucene70 norms producer's merge instance
+  IndexedDISI(IndexInput slice, long cost, IndexedDISICache cache, String name) {
+    this.name = name;
     this.slice = slice;
     this.cost = cost;
+    this.cache = cache == null ? IndexedDISICache.EMPTY : cache;
   }
 
   private int block = -1;
+  private long blockStart; // Used with the DENSE cache
   private long blockEnd;
   private int nextBlockIndex = -1;
   Method method;
@@ -126,6 +159,8 @@ final class IndexedDISI extends DocIdSetIterator {
   private int wordIndex = -1;
   // number of one bits encountered so far, including those of `word`
   private int numberOfOnes;
+  // Used with rank for jumps inside of DENSE
+  private int denseOrigoIndex;
 
   // ALL variables
   private int gap;
@@ -138,6 +173,7 @@ final class IndexedDISI extends DocIdSetIterator {
   @Override
   public int advance(int target) throws IOException {
     final int targetBlock = target & 0xFFFF0000;
+    // Note: The cache makes it easy to add support for random access. This has not been done as the API forbids it
     if (block < targetBlock) {
       advanceBlock(targetBlock);
     }
@@ -163,6 +199,20 @@ final class IndexedDISI extends DocIdSetIterator {
   }
 
   private void advanceBlock(int targetBlock) throws IOException {
+    if (targetBlock >= block+2) { // 1 block skip is (slightly) faster to do without block jump table
+      long offset = cache.getFilePointerForBlock(targetBlock >> IndexedDISICache.BLOCK_BITS);
+      if (offset != -1 && offset > slice.getFilePointer()) {
+        int origo = cache.getIndexForBlock(targetBlock >> IndexedDISICache.BLOCK_BITS);
+        if (origo != -1) {
+          this.nextBlockIndex = origo - 1; // -1 to compensate for the always-added 1 in readBlockHeader
+          slice.seek(offset);
+          readBlockHeader();
+          return;
+        }
+      }
+    }
+
+    // Fallback to non-cached
     do {
       slice.seek(blockEnd);
       readBlockHeader();
@@ -170,6 +220,7 @@ final class IndexedDISI extends DocIdSetIterator {
   }
 
   private void readBlockHeader() throws IOException {
+    blockStart = slice.getFilePointer();
     block = Short.toUnsignedInt(slice.readShort()) << 16;
     assert block >= 0;
     final int numValues = 1 + Short.toUnsignedInt(slice.readShort());
@@ -187,6 +238,7 @@ final class IndexedDISI extends DocIdSetIterator {
       blockEnd = slice.getFilePointer() + (1 << 13);
       wordIndex = -1;
       numberOfOnes = index + 1;
+      denseOrigoIndex = numberOfOnes;
     }
   }
 
@@ -250,6 +302,7 @@ final class IndexedDISI extends DocIdSetIterator {
       boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException {
         final int targetInBlock = target & 0xFFFF;
         final int targetWordIndex = targetInBlock >>> 6;
+
         for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
           disi.word = disi.slice.readLong();
           disi.numberOfOnes += Long.bitCount(disi.word);
@@ -263,7 +316,10 @@ final class IndexedDISI extends DocIdSetIterator {
           return true;
         }
 
+        // There were no set bits at the wanted position. Move forward until one is reached
         while (++disi.wordIndex < 1024) {
+          // This could use the rank cache to skip empty spaces >= 512 bits, but it seems unrealistic
+          // that such blocks would be DENSE
           disi.word = disi.slice.readLong();
           if (disi.word != 0) {
             disi.index = disi.numberOfOnes;
@@ -272,12 +328,15 @@ final class IndexedDISI extends DocIdSetIterator {
             return true;
           }
         }
+        // No set bits in the block at or after the wanted position.
         return false;
       }
+
       @Override
       boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException {
         final int targetInBlock = target & 0xFFFF;
         final int targetWordIndex = targetInBlock >>> 6;
+
         for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
           disi.word = disi.slice.readLong();
           disi.numberOfOnes += Long.bitCount(disi.word);
@@ -288,6 +347,8 @@ final class IndexedDISI extends DocIdSetIterator {
         disi.index = disi.numberOfOnes - Long.bitCount(leftBits);
         return (leftBits & 1L) != 0;
       }
+
+
     },
     ALL {
       @Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
new file mode 100644
index 00000000000..9a8068997fb
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene70;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Locale;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts;
+
+import static org.apache.lucene.codecs.lucene70.IndexedDISI.MAX_ARRAY_LENGTH;
+
+/**
+ * Caching of IndexedDISI with two strategies:
+ *
+ * A lookup table for block blockCache and index.
+ *
+ * The lookup table is an array of {@code long}s with an entry for each block. It allows for
+ * direct jumping to the block, as opposed to iteration from the current position and forward
+ * one block at a time.
+ *
+ * Each long entry consists of 2 logical parts:
+ *
+ * The first 31 bits holds the index (number of set bits in the blocks) up to just before the
+ * wanted block. The next 33 bits holds the offset into the underlying slice.
+ * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must
+ * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest
+ * block being DENSE and using 2^16 + 32 bits, and is likely to continue to hold as using
+ * more than double the amount of bits is unlikely to be an efficient representation.
+ * The cache overhead is numDocs/1024 bytes.
+ *
+ * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits).
+ * In the case of non-existing blocks, the entry in the lookup table has index equal to the
+ * previous entry and offset equal to the next non-empty block.
+ *
+ * The performance overhead for creating a cache instance is equivalent to visiting every 65536th
+ * doc value for the given field, i.e. it scales lineary to field size.
+ */
+public class IndexedDISICache implements Accountable {
+  private static final int BLOCK = 65536;   // The number of docIDs that a single block represents
+  static final int BLOCK_BITS = 16;
+  private static final long BLOCK_INDEX_SHIFT = 33; // Number of bits to shift a lookup entry to get the index
+  private static final long BLOCK_INDEX_MASK = ~0L << BLOCK_INDEX_SHIFT; // The index bits in a lookup entry
+  private static final long BLOCK_LOOKUP_MASK = ~BLOCK_INDEX_MASK; // The offset bits in a lookup entry
+
+  private long[] blockCache = null; // One every 65536 docs, contains index & slice position
+  private String creationStats = "";
+  private final String name; // Identifier for debug, log & inspection
+
+  // Flags for not-yet-defined-values used during building
+  private static final long BLOCK_EMPTY_INDEX = ~0L << BLOCK_INDEX_SHIFT;
+  private static final long BLOCK_EMPTY_LOOKUP = BLOCK_LOOKUP_MASK;
+  private static final long BLOCK_EMPTY = BLOCK_EMPTY_INDEX | BLOCK_EMPTY_LOOKUP;
+
+  /**
+   * Builds the stated caches for the given IndexInput.
+   *
+   * @param in positioned at the start of the logical underlying bitmap.
+   */
+  IndexedDISICache(IndexInput in, String name) throws IOException {
+    blockCache = new long[16];    // Will be extended when needed
+    Arrays.fill(blockCache, BLOCK_EMPTY);
+    this.name = name;
+    updateCaches(in);
+  }
+
+  private IndexedDISICache() {
+    this.blockCache = null;
+    this.name = "";
+  }
+
+  // Used to represent no caching.
+  public static final IndexedDISICache EMPTY = new IndexedDISICache();
+
+  /**
+   * If available, returns a position within the underlying {@link IndexInput} for the start of the block
+   * containing the wanted bit (the target) or the next non-EMPTY block, if the block representing the bit is empty.
+   * @param targetBlock the index for the block to resolve (docID / 65536).
+   * @return the offset for the block for target or -1 if it cannot be resolved.
+   */
+  long getFilePointerForBlock(int targetBlock) {
+    long offset = blockCache == null || blockCache.length <= targetBlock ?
+        -1 : blockCache[targetBlock] & BLOCK_LOOKUP_MASK;
+    return offset == BLOCK_EMPTY_LOOKUP ? -1 : offset;
+  }
+
+  /**
+   * If available, returns the index; number of set bits before the wanted block.
+   * @param targetBlock the block to resolve (docID / 65536).
+   * @return the index for the block or -1 if it cannot be resolved.
+   */
+  int getIndexForBlock(int targetBlock) {
+    if (blockCache == null || blockCache.length <= targetBlock) {
+      return -1;
+    }
+    return (blockCache[targetBlock] & BLOCK_INDEX_MASK) == BLOCK_EMPTY_INDEX ?
+        -1 : (int)(blockCache[targetBlock] >>> BLOCK_INDEX_SHIFT);
+  }
+
+  public boolean hasOffsets() {
+    return blockCache != null;
+  }
+
+  private void updateCaches(IndexInput slice) throws IOException {
+    final long startOffset = slice.getFilePointer();
+
+    final long startTime = System.nanoTime();
+    AtomicInteger statBlockALL = new AtomicInteger(0);
+    AtomicInteger statBlockDENSE = new AtomicInteger(0);
+    AtomicInteger statBlockSPARSE = new AtomicInteger(0);
+
+    // Fill phase
+    int largestBlock = fillCache(slice, statBlockALL, statBlockDENSE, statBlockSPARSE);
+    freezeCaches(largestBlock);
+
+    slice.seek(startOffset); // Leave it as we found it
+    creationStats = String.format(Locale.ENGLISH,
+        "name=%s, blocks=%d (ALL=%d, DENSE=%d, SPARSE=%d, EMPTY=%d), time=%dms, block=%d bytes",
+        name,
+        largestBlock+1, statBlockALL.get(), statBlockDENSE.get(), statBlockSPARSE.get(),
+        (largestBlock+1-statBlockALL.get()-statBlockDENSE.get()-statBlockSPARSE.get()),
+        (System.nanoTime()-startTime)/1000000,
+        blockCache == null ? 0 : blockCache.length*Long.BYTES);
+  }
+
+  private int fillCache(
+      IndexInput slice, AtomicInteger statBlockALL, AtomicInteger statBlockDENSE, AtomicInteger statBlockSPARSE)
+      throws IOException {
+    int largestBlock = -1;
+    long index = 0;
+    int rankIndex = -1;
+    while (slice.getFilePointer() < slice.length()) {
+      final long startFilePointer = slice.getFilePointer();
+
+      final int blockIndex = Short.toUnsignedInt(slice.readShort());
+      final int numValues = 1 + Short.toUnsignedInt(slice.readShort());
+
+      assert blockIndex > largestBlock;
+      if (blockIndex == DocIdSetIterator.NO_MORE_DOCS >>> 16) { // End reached
+        assert Short.toUnsignedInt(slice.readShort()) == (DocIdSetIterator.NO_MORE_DOCS & 0xFFFF);
+        break;
+      }
+      largestBlock = blockIndex;
+
+      blockCache = ArrayUtil.grow(blockCache, blockIndex+1); // No-op if large enough
+      blockCache[blockIndex] = (index << BLOCK_INDEX_SHIFT) | startFilePointer;
+      index += numValues;
+
+      if (numValues <= MAX_ARRAY_LENGTH) { // SPARSE
+        statBlockSPARSE.incrementAndGet();
+        slice.seek(slice.getFilePointer() + (numValues << 1));
+        continue;
+      }
+      if (numValues == 65536) { // ALL
+        statBlockALL.incrementAndGet();
+        // Already at next block offset
+        continue;
+      }
+
+      // The block is DENSE
+      statBlockDENSE.incrementAndGet();
+      long nextBlockOffset = slice.getFilePointer() + (1 << 13);
+      slice.seek(nextBlockOffset);
+    }
+
+    return largestBlock;
+  }
+
+  private void freezeCaches(int largestBlock) {
+    if (largestBlock == -1) { // No set bit: Disable the caches
+      blockCache = null;
+      return;
+    }
+
+    // Reduce size to minimum
+    if (blockCache.length-1 > largestBlock) {
+      long[] newBC = new long[Math.max(largestBlock - 1, 1)];
+      System.arraycopy(blockCache, 0, newBC, 0, newBC.length);
+      blockCache = newBC;
+    }
+
+    // Set non-defined blockCache entries (caused by blocks with 0 set bits) to the subsequently defined one
+    long latest = BLOCK_EMPTY;
+    for (int i = blockCache.length-1; i >= 0 ; i--) {
+      long current = blockCache[i];
+      if (current == BLOCK_EMPTY) {
+        blockCache[i] = latest;
+      } else {
+        latest = current;
+      }
+    }
+  }
+
+  /**
+   * @return Human readable details from the creation of the cache instance.
+   */
+  public String getCreationStats() {
+    return creationStats;
+  }
+
+  /**
+   * @return Human-readable name for the cache instance.
+   */
+  public String getName() {
+    return name;
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return (blockCache == null ? 0 : RamUsageEstimator.sizeOf(blockCache)) +
+        RamUsageEstimator.NUM_BYTES_OBJECT_REF*3 +
+        RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + creationStats.length()*2;
+  }
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
new file mode 100644
index 00000000000..6cf8af1d4b7
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene70;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.RandomAccessInput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Creates and stores caches for {@link IndexedDISI} and {@link Lucene70DocValuesProducer}.
+ * The caches are stored in maps, where the key is made up from offset and length of a slice
+ * in an underlying segment. Each segment uses their own IndexedDISICacheFactory.
+ *
+ * See {@link IndexedDISICache} for details on the caching.
+ */
+public class IndexedDISICacheFactory implements Accountable {
+
+  /**
+   * If the slice with the DISI-data is less than this number of bytes, don't create a cache.
+   * This is a very low number as the DISI-structure very efficiently represents EMPTY and ALL blocks.
+   */
+  private static int MIN_LENGTH_FOR_CACHING = 50; // Set this very low: Could be 9 EMPTY followed by a SPARSE
+
+  // jump-table and rank for DISI blocks
+  private final Map<Long, IndexedDISICache> disiPool = new HashMap<>();
+
+  /**
+   * Create a cached {@link IndexedDISI} instance.
+   * @param data   persistent data containing the DISI-structure.
+   * @param cost   cost as defined for IndexedDISI.
+   * @param name   identifier for the DISI-structure for debug purposes.
+   * @return a cached IndexedDISI or a plain IndexedDISI, if caching is not applicable.
+   * @throws IOException if the DISI-structure could not be accessed.
+   */
+  IndexedDISI createCachedIndexedDISI(IndexInput data, long key, int cost, String name) throws IOException {
+    IndexedDISICache cache = getCache(data, key, name);
+    return new IndexedDISI(data, cost, cache, name);
+  }
+
+  /**
+   * Create a cached {@link IndexedDISI} instance.
+   * @param data   persistent data containing the DISI-structure.
+   * @param offset same as the offset that will also be used for creating an {@link IndexedDISI}.
+   * @param length same af the length that will also be used for creating an {@link IndexedDISI}.
+   * @param cost   cost as defined for IndexedDISI.
+   * @param name   identifier for the DISI-structure for debug purposes.
+   * @return a cached IndexedDISI or a plain IndexedDISI, if caching is not applicable.
+   * @throws IOException if the DISI-structure could not be accessed.
+   */
+  IndexedDISI createCachedIndexedDISI(IndexInput data, long offset, long length, long cost, String name)
+      throws IOException {
+    IndexedDISICache cache = getCache(data, offset, length, name);
+    return new IndexedDISI(data, offset, length, cost, cache, name);
+  }
+
+  /**
+   * Creates a cache (jump table) for {@link IndexedDISI}.
+   * If the cache has previously been created, the old cache is returned.
+   * @param data   the slice to create a cache for.
+   * @param offset same as the offset that will also be used for creating an {@link IndexedDISI}.
+   * @param length same af the length that will also be used for creating an {@link IndexedDISI}.
+   * @param name human readable designation, typically a field name. Used for debug, log and inspection.
+   * @return a cache for the given slice+offset+length or null if not suitable for caching.
+   */
+  public IndexedDISICache getCache(IndexInput data, long offset, long length, String name) throws IOException {
+    if (length < MIN_LENGTH_FOR_CACHING) {
+      return null;
+    }
+
+    long key = offset + length;
+    IndexedDISICache cache = disiPool.get(key);
+    if (cache == null) {
+      // TODO: Avoid overlapping builds of the same cache for performance reason
+      cache = new IndexedDISICache(data.slice("docs", offset, length), name);
+      disiPool.put(key, cache);
+    }
+    return cache;
+  }
+
+  /**
+   * Creates a cache (jump table) for {@link IndexedDISI}.
+   * If the cache has previously been created, the old cache is returned.
+   * @param slice the input slice.
+   * @param key identifier for the cache, unique within the segment that originated the slice.
+   *            Recommendation is offset+length for the slice, relative to the data mapping the segment.
+   *            Warning: Do not use slice.getFilePointer and slice.length as they are not guaranteed
+   *            to be unique within the segment (slice.getFilePointer is 0 when a sub-slice is created).
+   * @param name human readable designation, typically a field name. Used for debug, log and inspection.
+   * @return a cache for the given slice+offset+length or null if not suitable for caching.
+   */
+  public IndexedDISICache getCache(IndexInput slice, long key, String name) throws IOException {
+    final long length = slice.length();
+    if (length < MIN_LENGTH_FOR_CACHING) {
+      return null;
+    }
+
+    IndexedDISICache cache = disiPool.get(key);
+    if (cache == null) {
+      // TODO: Avoid overlapping builds of the same cache
+      cache = new IndexedDISICache(slice, name);
+      disiPool.put(key, cache);
+    }
+    return cache;
+  }
+
+  // Statistics
+  public long getDISIBlocksWithOffsetsCount() {
+    return disiPool.values().stream().filter(IndexedDISICache::hasOffsets).count();
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    long mem = RamUsageEstimator.shallowSizeOf(this) +
+        RamUsageEstimator.shallowSizeOf(disiPool);
+    for (Map.Entry<Long, IndexedDISICache> cacheEntry: disiPool.entrySet()) {
+      mem += RamUsageEstimator.shallowSizeOf(cacheEntry);
+      mem += RamUsageEstimator.sizeOf(cacheEntry.getKey());
+      mem += cacheEntry.getValue().ramBytesUsed();
+    }
+    return mem;
+  }
+
+  /**
+   * Releases all caches.
+   */
+  void releaseAll() {
+    disiPool.clear();
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
index b0f6e840d06..812caba9d37 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
@@ -57,6 +57,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   private final Map<String,SortedNumericEntry> sortedNumerics = new HashMap<>();
   private long ramBytesUsed;
   private final IndexInput data;
+  private final IndexedDISICacheFactory disiCacheFactory = new IndexedDISICacheFactory();
   private final int maxDoc;
 
   /** expert: instantiates a new reader */
@@ -119,23 +120,23 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       }
       byte type = meta.readByte();
       if (type == Lucene70DocValuesFormat.NUMERIC) {
-        numerics.put(info.name, readNumeric(meta));
+        numerics.put(info.name, readNumeric(meta, info.name));
       } else if (type == Lucene70DocValuesFormat.BINARY) {
-        binaries.put(info.name, readBinary(meta));
+        binaries.put(info.name, readBinary(meta, info.name));
       } else if (type == Lucene70DocValuesFormat.SORTED) {
-        sorted.put(info.name, readSorted(meta));
+        sorted.put(info.name, readSorted(meta, info.name));
       } else if (type == Lucene70DocValuesFormat.SORTED_SET) {
-        sortedSets.put(info.name, readSortedSet(meta));
+        sortedSets.put(info.name, readSortedSet(meta, info.name));
       } else if (type == Lucene70DocValuesFormat.SORTED_NUMERIC) {
-        sortedNumerics.put(info.name, readSortedNumeric(meta));
+        sortedNumerics.put(info.name, readSortedNumeric(meta, info.name));
       } else {
         throw new CorruptIndexException("invalid type: " + type, meta);
       }
     }
   }
 
-  private NumericEntry readNumeric(ChecksumIndexInput meta) throws IOException {
-    NumericEntry entry = new NumericEntry();
+  private NumericEntry readNumeric(ChecksumIndexInput meta, String name) throws IOException {
+    NumericEntry entry = new NumericEntry(name);
     readNumeric(meta, entry);
     return entry;
   }
@@ -167,8 +168,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     entry.valuesLength = meta.readLong();
   }
 
-  private BinaryEntry readBinary(ChecksumIndexInput meta) throws IOException {
-    BinaryEntry entry = new BinaryEntry();
+  private BinaryEntry readBinary(ChecksumIndexInput meta, String name) throws IOException {
+    BinaryEntry entry = new BinaryEntry(name);
     entry.dataOffset = meta.readLong();
     entry.dataLength = meta.readLong();
     entry.docsWithFieldOffset = meta.readLong();
@@ -186,8 +187,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     return entry;
   }
 
-  private SortedEntry readSorted(ChecksumIndexInput meta) throws IOException {
-    SortedEntry entry = new SortedEntry();
+  private SortedEntry readSorted(ChecksumIndexInput meta, String name) throws IOException {
+    SortedEntry entry = new SortedEntry(name);
     entry.docsWithFieldOffset = meta.readLong();
     entry.docsWithFieldLength = meta.readLong();
     entry.numDocsWithField = meta.readInt();
@@ -198,12 +199,12 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     return entry;
   }
 
-  private SortedSetEntry readSortedSet(ChecksumIndexInput meta) throws IOException {
-    SortedSetEntry entry = new SortedSetEntry();
+  private SortedSetEntry readSortedSet(ChecksumIndexInput meta, String name) throws IOException {
+    SortedSetEntry entry = new SortedSetEntry(name);
     byte multiValued = meta.readByte();
     switch (multiValued) {
       case 0: // singlevalued
-        entry.singleValueEntry = readSorted(meta);
+        entry.singleValueEntry = readSorted(meta, name);
         return entry;
       case 1: // multivalued
         break;
@@ -245,8 +246,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     entry.termsIndexAddressesLength = meta.readLong();
   }
 
-  private SortedNumericEntry readSortedNumeric(ChecksumIndexInput meta) throws IOException {
-    SortedNumericEntry entry = new SortedNumericEntry();
+  private SortedNumericEntry readSortedNumeric(ChecksumIndexInput meta, String name) throws IOException {
+    SortedNumericEntry entry = new SortedNumericEntry(name);
     readNumeric(meta, entry);
     entry.numDocsWithField = meta.readInt();
     if (entry.numDocsWithField != entry.numValues) {
@@ -262,9 +263,23 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   @Override
   public void close() throws IOException {
     data.close();
+    disiCacheFactory.releaseAll();
   }
 
-  private static class NumericEntry {
+  // Highly debatable if this is a sane construct as the name is only used for debug/logging/inspection purposes
+  // This was introduced in LUCENE-8374
+  private static class EntryImpl {
+    final String name;
+
+    public EntryImpl(String name) {
+      this.name = name;
+    }
+  }
+
+  private static class NumericEntry extends EntryImpl {
+    public NumericEntry(String name) {
+      super(name);
+    }
     long[] table;
     int blockShift;
     byte bitsPerValue;
@@ -277,7 +292,10 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     long valuesLength;
   }
 
-  private static class BinaryEntry {
+  private static class BinaryEntry extends EntryImpl {
+    public BinaryEntry(String name) {
+      super(name);
+    }
     long dataOffset;
     long dataLength;
     long docsWithFieldOffset;
@@ -290,7 +308,10 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     DirectMonotonicReader.Meta addressesMeta;
   }
 
-  private static class TermsDictEntry {
+  private static class TermsDictEntry extends EntryImpl {
+    public TermsDictEntry(String name) {
+      super(name);
+    }
     long termsDictSize;
     int termsDictBlockShift;
     DirectMonotonicReader.Meta termsAddressesMeta;
@@ -308,6 +329,9 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   }
 
   private static class SortedEntry extends TermsDictEntry {
+    public SortedEntry(String name) {
+      super(name);
+    }
     long docsWithFieldOffset;
     long docsWithFieldLength;
     int numDocsWithField;
@@ -317,6 +341,9 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   }
 
   private static class SortedSetEntry extends TermsDictEntry {
+    public SortedSetEntry(String name) {
+      super(name);
+    }
     SortedEntry singleValueEntry;
     long docsWithFieldOffset;
     long docsWithFieldLength;
@@ -330,6 +357,9 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   }
 
   private static class SortedNumericEntry extends NumericEntry {
+    public SortedNumericEntry(String name) {
+      super(name);
+    }
     int numDocsWithField;
     DirectMonotonicReader.Meta addressesMeta;
     long addressesOffset;
@@ -338,7 +368,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
 
   @Override
   public long ramBytesUsed() {
-    return ramBytesUsed;
+    return ramBytesUsed + disiCacheFactory.ramBytesUsed();
   }
 
   @Override
@@ -496,7 +526,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       }
     } else {
       // sparse
-      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numValues);
+      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
+          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numValues, entry.name);
       if (entry.bitsPerValue == 0) {
         return new SparseNumericDocValues(disi) {
           @Override
@@ -767,7 +798,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       }
     } else {
       // sparse
-      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
+      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
+          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField, entry.name);
       if (entry.minLength == entry.maxLength) {
         // fixed length
         final int length = entry.maxLength;
@@ -868,7 +900,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       };
     } else {
       // sparse
-      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
+      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
+          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField, entry.name);
       return new BaseSortedDocValues(entry, data) {
 
         @Override
@@ -1236,7 +1269,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       };
     } else {
       // sparse
-      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
+      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
+          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField, entry.name);
       return new SortedNumericDocValues() {
 
         boolean set;
@@ -1362,7 +1396,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       };
     } else {
       // sparse
-      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
+      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
+          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField, entry.name);
       return new BaseSortedSetDocValues(entry, data) {
 
         boolean set;
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
index 64bfbd5fc96..aae3a7f36ca 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
@@ -150,6 +150,39 @@ public class TestIndexedDISI extends LuceneTestCase {
       }
     }
   }
+  public void testDenseMultiBlock() throws IOException {
+    try (Directory dir = newDirectory()) {
+      int maxDoc = 10 * 65536; // 10 blocks
+      FixedBitSet set = new FixedBitSet(maxDoc);
+      for (int i = 0; i < maxDoc; i += 2) { // Set every other to ensure dense
+        set.set(i);
+      }
+      doTest(set, dir);
+    }
+  }
+
+  public void testOneDocMissingFixed() throws IOException {
+    int maxDoc = 9699;
+    FixedBitSet set = new FixedBitSet(maxDoc);
+    set.set(0, maxDoc);
+    set.clear(1345);
+    try (Directory dir = newDirectory()) {
+
+      final int cardinality = set.cardinality();
+      long length;
+      try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
+        IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out);
+        length = out.getFilePointer();
+      }
+
+      int step = 16000;
+      try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
+        IndexedDISI disi = new IndexedDISI(in, 0L, length, cardinality);
+        BitSetIterator disi2 = new BitSetIterator(set, cardinality);
+        assertAdvanceEquality(disi, disi2, step);
+      }
+    }
+  }
 
   public void testRandom() throws IOException {
     try (Directory dir = newDirectory()) {
@@ -188,32 +221,14 @@ public class TestIndexedDISI extends LuceneTestCase {
     try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
       IndexedDISI disi = new IndexedDISI(in, 0L, length, cardinality);
       BitSetIterator disi2 = new BitSetIterator(set, cardinality);
-      int i = 0;
-      for (int doc = disi2.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi2.nextDoc()) {
-        assertEquals(doc, disi.nextDoc());
-        assertEquals(i++, disi.index());
-      }
-      assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
+      assertSingleStepEquality(disi, disi2);
     }
 
     for (int step : new int[] {1, 10, 100, 1000, 10000, 100000}) {
       try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
         IndexedDISI disi = new IndexedDISI(in, 0L, length, cardinality);
         BitSetIterator disi2 = new BitSetIterator(set, cardinality);
-        int index = -1;
-        while (true) {
-          int target = disi2.docID() + step;
-          int doc;
-          do {
-            doc = disi2.nextDoc();
-            index++;
-          } while (doc < target);
-          assertEquals(doc, disi.advance(target));
-          if (doc == DocIdSetIterator.NO_MORE_DOCS) {
-            break;
-          }
-          assertEquals(index, disi.index());
-        }
+        assertAdvanceEquality(disi, disi2, step);
       }
     }
 
@@ -221,8 +236,18 @@ public class TestIndexedDISI extends LuceneTestCase {
       try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
         IndexedDISI disi = new IndexedDISI(in, 0L, length, cardinality);
         BitSetIterator disi2 = new BitSetIterator(set, cardinality);
+        int disi2length = set.length();
+        assertAdvanceExactRandomized(disi, disi2, disi2length, step);
+      }
+    }
+
+    dir.deleteFile("foo");
+  }
+
+  private void assertAdvanceExactRandomized(IndexedDISI disi, BitSetIterator disi2, int disi2length, int step)
+      throws IOException {
         int index = -1;
-        for (int target = 0; target < set.length(); ) {
+    for (int target = 0; target < disi2length; ) {
           target += TestUtil.nextInt(random(), 0, step);
           int doc = disi2.docID();
           while (doc < target) {
@@ -241,9 +266,31 @@ public class TestIndexedDISI extends LuceneTestCase {
           }
         }
       }
+
+  private void assertSingleStepEquality(IndexedDISI disi, BitSetIterator disi2) throws IOException {
+    int i = 0;
+    for (int doc = disi2.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi2.nextDoc()) {
+      assertEquals(doc, disi.nextDoc());
+      assertEquals(i++, disi.index());
+    }
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
     }
 
-    dir.deleteFile("foo");
+  private void assertAdvanceEquality(IndexedDISI disi, BitSetIterator disi2, int step) throws IOException {
+    int index = -1;
+    while (true) {
+      int target = disi2.docID() + step;
+      int doc;
+      do {
+        doc = disi2.nextDoc();
+        index++;
+      } while (doc < target);
+      assertEquals(doc, disi.advance(target));
+      if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+        break;
+      }
+      assertEquals("Expected equality using step " + step + " at docID " + doc, index, disi.index());
+    }
   }
 
 }
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
index 0214e5447b4..daebad9127e 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
@@ -18,7 +18,13 @@ package org.apache.lucene.index;
 
 
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
 
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -27,7 +33,9 @@ import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MMapDirectory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
@@ -123,6 +131,103 @@ public class TestDocValues extends LuceneTestCase {
     iw.close();
     dir.close();
   }
+
+  /**
+   * Triggers varying bits per value codec representation for numeric.
+   */
+  public void testNumericFieldVaryingBPV() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
+    long generatedSum = 0;
+    for (int bpv = 2 ; bpv < 24 ; bpv+=3) {
+      for (int i = 0 ; i < 66000 ; i++) {
+        Document doc = new Document();
+        int max = 1 << (bpv - 1);
+        int value =  random().nextInt(max) | max;
+        generatedSum += value;
+        //System.out.println("--- " + value);
+        doc.add(new NumericDocValuesField("foo", value));
+        iw.addDocument(doc);
+      }
+    }
+    iw.flush();
+    iw.forceMerge(1, true);
+    iw.commit();
+    DirectoryReader dr = DirectoryReader.open(iw);
+    LeafReader r = getOnlyLeafReader(dr);
+
+    // ok
+    NumericDocValues numDV = DocValues.getNumeric(r, "foo");
+
+    assertNotNull(numDV);
+    long sum = 0;
+    while (numDV.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+      sum += numDV.longValue();
+    }
+    assertEquals("The sum of retrieved values should match the input", generatedSum, sum);
+
+//    assertNotNull(DocValues.getSortedNumeric(r, "foo"));
+
+    dr.close();
+    iw.close();
+    dir.close();
+  }
+
+  // LUCENE-8374 had a bug where a vBPV-block with BPV==0 as the very end of the numeric DocValues made it fail
+  public void testNumericEntryZeroesLastBlock() throws IOException {
+    List<Long> docValues = new ArrayList<>(2*16384);
+    for (int id = 0 ; id < 2*16384 ; id++) { // 2 vBPV-blocks for the dv-field
+      if (id < 16384) { // First vBPV-block just has semi-ramdom values
+        docValues.add((long) (id % 1000));
+      } else {          // Second block is all zeroes, resulting in an extreme "1-byte for the while block"-representation
+        docValues.add(0L);
+      }
+    }
+    assertRandomAccessDV("Last block BPV=0", docValues);
+  }
+
+  private void assertRandomAccessDV(String designation, List<Long> docValues) throws IOException {
+    // Create corpus
+    Path zeroPath = Paths.get(System.getProperty("java.io.tmpdir"),"plain_" + random().nextInt());
+    Directory zeroDir = new MMapDirectory(zeroPath);
+    IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
+    //iwc.setCodec(Codec.forName("Lucene70"));
+    IndexWriter iw = new IndexWriter(zeroDir, iwc);
+
+    for (int id = 0 ; id < docValues.size() ; id++) {
+      Document doc = new Document();
+      doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
+      doc.add(new NumericDocValuesField("dv", docValues.get(id)));
+      iw.addDocument(doc);
+    }
+    iw.flush();
+    iw.commit();
+    iw.forceMerge(1, true);
+    iw.close();
+
+    DirectoryReader dr = DirectoryReader.open(zeroDir);
+
+    for (int id = 0 ; id < docValues.size() ; id++) {
+      int readerIndex = dr.readerIndex(id);
+      // We create a new reader each time as we want to test vBPV-skipping and not sequential iteration
+      NumericDocValues numDV = dr.leaves().get(readerIndex).reader().getNumericDocValues("dv");
+      assertTrue(designation + ": There should be a value for docID " + id, numDV.advanceExact(id));
+      assertEquals(designation + ": The value for docID " + id + " should be as expected",
+          docValues.get(id), Long.valueOf(numDV.longValue()));
+    }
+
+    // Clean up
+    deleteAndClose(zeroDir);
+    Files.delete(zeroPath);
+  }
+
+  private void deleteAndClose(Directory dir) throws IOException {
+    String[] files = dir.listAll();
+    for (String file: files) {
+      dir.deleteFile(file);
+    }
+    dir.close();
+  }
   
   /** 
    * field with binary docvalues