LUCENE-10062: Switch to numeric doc values for encoding taxonomy ordinals

2025-02-27 21:09:19 +00:00 · 2021-11-19 13:11:42 -08:00 · 2021-11-19 13:11:42 -08:00 · 0ba310782f
commit 0ba310782f
parent 6b99f03cdd
22 changed files with 992 additions and 189 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -367,6 +367,9 @@ Improvements
  See release notes. https://github.com/locationtech/spatial4j/releases/tag/spatial4j-0.8
  (David Smiley)

+* LUCENE-10062: Switch taxonomy faceting to use numeric doc values for storing ordinals instead of binary doc values
+  with its own custom encoding. (Greg Miller)
+
 Bug fixes
 ---------------------

--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@ -450,3 +450,17 @@ structure. Use a standard BoostQuery here instead.

 Rather than using `setSort()` to change sort values, you should instead create
 a new Sort instance with the new values.
+
+## Taxonomy-based faceting uses more modern encodings (LUCENE-9450, LUCENE-10062, LUCENE-10122)
+
+The side-car taxonomy index now uses doc values for ord-to-path lookup (LUCENE-9450) and parent
+lookup (LUCENE-10122) instead of stored fields and positions (respectively). Document ordinals
+are now encoded with `SortedNumericDocValues` instead of using a custom (v-int) binary format.
+Performance gains have been observed with these encoding changes, but to benefit from them, users
+must create a new index using 9.x (it is not sufficient to reindex documents against an existing
+8.x index). In order to remain backwards-compatible with 8.x indexes, the older format is retained 
+until a full rebuild is done.
+
+Additionally, `OrdinalsReader` (and sub-classes) have been marked `@Deprecated` as custom binary
+encodings will not be supported for Document ordinals in 9.x onwards (`SortedNumericDocValues` are
+used out-of-the-box instead).
--- a/lucene/facet/src/java/org/apache/lucene/facet/FacetUtils.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetUtils.java
@ -18,8 +18,15 @@
 package org.apache.lucene.facet;

 import java.io.IOException;
+import java.util.function.BiConsumer;
+import org.apache.lucene.facet.taxonomy.BackCompatSortedNumericDocValues;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;

 /**
 * Utility class with a single method for getting a DocIdSetIterator that skips deleted docs
@ -81,4 +88,47 @@ public final class FacetUtils {
      }
    };
  }
+
+  /**
+   * Loads ordinal values as {@link SortedNumericDocValues}. If the index still uses the older
+   * binary format, it will wrap that with the SNDV API. Newer format indexes will just load the
+   * SNDV directly.
+   *
+   * <p>This is really only needed/useful to maintain back-compat with the binary format. Once
+   * back-compat is no longer needed, the SNDV field should just be loaded directly.
+   *
+   * @deprecated Please do not rely on this method. It is added as a temporary measure for providing
+   *     index backwards-compatibility with Lucene 8 and earlier indexes, and will be removed in
+   *     Lucene 10.
+   */
+  @Deprecated
+  public static SortedNumericDocValues loadOrdinalValues(LeafReader reader, String fieldName)
+      throws IOException {
+    return loadOrdinalValues(reader, fieldName, null);
+  }
+
+  /**
+   * Loads ordinal values as {@link SortedNumericDocValues}. If the index still uses the older
+   * binary format, it will wrap that with the SNDV API. Newer format indexes will just load the
+   * SNDV directly. The provided {@code binaryValueDecoder} allows custom decoding logic for older
+   * binary format fields to be provided.
+   *
+   * <p>This is really only needed/useful to maintain back-compat with the binary format. Once
+   * back-compat is no longer needed, the SNDV field should just be loaded directly.
+   *
+   * @deprecated Please do not rely on this method. It is added as a temporary measure for providing
+   *     index backwards-compatibility with Lucene 8 and earlier indexes, and will be removed in
+   *     Lucene 10.
+   */
+  @Deprecated
+  public static SortedNumericDocValues loadOrdinalValues(
+      LeafReader reader, String fieldName, BiConsumer<BytesRef, IntsRef> binaryValueDecoder)
+      throws IOException {
+    if (reader.getMetaData().getCreatedVersionMajor() <= 8) {
+      BinaryDocValues oldStyleDocValues = reader.getBinaryDocValues(fieldName);
+      return BackCompatSortedNumericDocValues.wrap(oldStyleDocValues, binaryValueDecoder);
+    } else {
+      return reader.getSortedNumericDocValues(fieldName);
+    }
+  }
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetsConfig.java
@ -28,6 +28,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
@ -409,9 +410,26 @@ public class FacetsConfig {
        indexDrillDownTerms(doc, indexFieldName, dimConfig, facetLabel);
      }

-      // Facet counts:
-      // DocValues are considered stored fields:
-      doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals.get())));
+      // Store the taxonomy ordinals associated with each doc. Prefer to use SortedNumericDocValues
+      // but "fall back" to a custom binary format to maintain backwards compatibility with Lucene 8
+      // indexes.
+      IntsRef ords = ordinals.get();
+      if (taxoWriter.useNumericDocValuesForOrdinals()) {
+        // Dedupe and encode the ordinals. It's not important that we sort here
+        // (SortedNumericDocValuesField will handle this internally), but we
+        // sort to identify dups (since SNDVF doesn't dedupe):
+        Arrays.sort(ords.ints, ords.offset, ords.offset + ords.length);
+        int prev = -1;
+        for (int i = 0; i < ords.length; i++) {
+          int ord = ords.ints[ords.offset + i];
+          if (ord > prev) {
+            doc.add(new SortedNumericDocValuesField(indexFieldName, ord));
+            prev = ord;
+          }
+        }
+      } else {
+        doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ords)));
+      }
    }
  }

@ -507,7 +525,13 @@ public class FacetsConfig {
    }
  }

-  /** Encodes ordinals into a BytesRef; expert: subclass can override this to change encoding. */
+  /**
+   * Encodes ordinals into a BytesRef; expert: subclass can override this to change encoding.
+   *
+   * @deprecated Starting in Lucene 9, we moved to a more straight-forward numeric doc values
+   *     encoding and no longer support custom binary encodings.
+   */
+  @Deprecated
  protected BytesRef dedupAndEncode(IntsRef ordinals) {
    Arrays.sort(ordinals.ints, ordinals.offset, ordinals.length);
    byte[] bytes = new byte[5 * ordinals.length];
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/BackCompatSortedNumericDocValues.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/BackCompatSortedNumericDocValues.java
@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.facet.taxonomy;
+
+import java.io.IOException;
+import java.util.function.BiConsumer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+
+/**
+ * Wraps a {@link BinaryDocValues} instance, providing a {@link SortedNumericDocValues} interface
+ * for the purpose of being backwards-compatible. (see: LUCENE-10062)
+ *
+ * @deprecated Only here for back-compat support. Should be removed with Lucene 10.
+ */
+@Deprecated
+public class BackCompatSortedNumericDocValues extends SortedNumericDocValues {
+  private final BinaryDocValues binaryDocValues;
+  private final BiConsumer<BytesRef, IntsRef> binaryValueDecoder;
+  private final IntsRef scratch = new IntsRef();
+  private int curr;
+
+  /**
+   * Wrap the provided binary encoded doc values. Decodes the binary values with the provided {@code
+   * binaryValueDecoder}, allowing the default decoding behavior to be overridden. If a null doc
+   * values instance is provided, the returned instance will also be null. If a null value decoder
+   * is specified, the default encoding will be assumed.
+   */
+  public static SortedNumericDocValues wrap(
+      BinaryDocValues binaryDocValues, BiConsumer<BytesRef, IntsRef> binaryValueDecoder) {
+    if (binaryDocValues == null) {
+      return null;
+    }
+
+    return new BackCompatSortedNumericDocValues(binaryDocValues, binaryValueDecoder);
+  }
+
+  /** see the static {@code wrap} methods */
+  private BackCompatSortedNumericDocValues(
+      BinaryDocValues binaryDocValues, BiConsumer<BytesRef, IntsRef> binaryValueDecoder) {
+    assert binaryDocValues != null;
+    this.binaryDocValues = binaryDocValues;
+
+    if (binaryValueDecoder != null) {
+      this.binaryValueDecoder = binaryValueDecoder;
+    } else {
+      this.binaryValueDecoder = BackCompatSortedNumericDocValues::loadValues;
+    }
+  }
+
+  @Override
+  public boolean advanceExact(int target) throws IOException {
+    boolean result = binaryDocValues.advanceExact(target);
+    if (result) {
+      reloadValues();
+    }
+    return result;
+  }
+
+  @Override
+  public long nextValue() throws IOException {
+    curr++;
+    assert curr < scratch.length;
+    return scratch.ints[scratch.offset + curr];
+  }
+
+  @Override
+  public int docValueCount() {
+    return scratch.length;
+  }
+
+  @Override
+  public int docID() {
+    return binaryDocValues.docID();
+  }
+
+  @Override
+  public int nextDoc() throws IOException {
+    return advance(binaryDocValues.docID() + 1);
+  }
+
+  @Override
+  public int advance(int target) throws IOException {
+    int doc = binaryDocValues.advance(target);
+    if (doc != NO_MORE_DOCS) {
+      reloadValues();
+    }
+    return doc;
+  }
+
+  @Override
+  public long cost() {
+    return binaryDocValues.cost();
+  }
+
+  private void reloadValues() throws IOException {
+    curr = -1;
+    binaryValueDecoder.accept(binaryDocValues.binaryValue(), scratch);
+  }
+
+  /** Load ordinals for the currently-positioned doc, assuming the default binary encoding. */
+  static void loadValues(BytesRef buf, IntsRef ordinals) {
+    // grow the buffer up front, even if by a large number of values (buf.length)
+    // that saves the need to check inside the loop for every decoded value if
+    // the buffer needs to grow.
+    if (ordinals.ints.length < buf.length) {
+      ordinals.ints = ArrayUtil.grow(ordinals.ints, buf.length);
+    }
+
+    ordinals.offset = 0;
+    ordinals.length = 0;
+
+    // it is better if the decoding is inlined like so, and not e.g.
+    // in a utility method
+    int upto = buf.offset + buf.length;
+    int value = 0;
+    int offset = buf.offset;
+    int prev = 0;
+    while (offset < upto) {
+      byte b = buf.bytes[offset++];
+      if (b >= 0) {
+        ordinals.ints[ordinals.length] = ((value << 7) | b) + prev;
+        value = 0;
+        prev = ordinals.ints[ordinals.length];
+        ordinals.length++;
+      } else {
+        value = (value << 7) | (b & 0x7F);
+      }
+    }
+  }
+}
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CachedOrdinalsReader.java
@ -45,7 +45,11 @@ import org.apache.lucene.util.RamUsageEstimator;
 *
 * <p><b>NOTE:</b> create one instance of this and re-use it for all facet implementations (the
 * cache is per-instance, not static).
+ *
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+ *     Lucene 9
 */
+@Deprecated
 public class CachedOrdinalsReader extends OrdinalsReader implements Accountable {

  private final OrdinalsReader source;
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/DocValuesOrdinalsReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/DocValuesOrdinalsReader.java
@ -17,15 +17,22 @@
 package org.apache.lucene.facet.taxonomy;

 import java.io.IOException;
+import org.apache.lucene.facet.FacetUtils;
 import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;

-/** Decodes ordinals previously indexed into a BinaryDocValues field */
+/**
+ * Decodes ordinals previously indexed into a BinaryDocValues field
+ *
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+ *     Lucene 9
+ */
+@Deprecated
 public class DocValuesOrdinalsReader extends OrdinalsReader {
  private final String field;

@ -41,12 +48,12 @@ public class DocValuesOrdinalsReader extends OrdinalsReader {

  @Override
  public OrdinalsSegmentReader getReader(LeafReaderContext context) throws IOException {
-    BinaryDocValues values0 = context.reader().getBinaryDocValues(field);
-    if (values0 == null) {
-      values0 = DocValues.emptyBinary();
+    SortedNumericDocValues dv0 =
+        FacetUtils.loadOrdinalValues(context.reader(), field, this::decode);
+    if (dv0 == null) {
+      dv0 = DocValues.emptySortedNumeric();
    }
-
-    final BinaryDocValues values = values0;
+    final SortedNumericDocValues dv = dv0;

    return new OrdinalsSegmentReader() {

@ -59,16 +66,21 @@ public class DocValuesOrdinalsReader extends OrdinalsReader {
              "docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
        }
        lastDocID = docID;
-        if (docID > values.docID()) {
-          values.advance(docID);
+
+        ordinals.offset = 0;
+        ordinals.length = 0;
+
+        if (dv.advanceExact(docID)) {
+          int count = dv.docValueCount();
+          if (ordinals.ints.length < count) {
+            ordinals.ints = ArrayUtil.grow(ordinals.ints, count);
+          }
+
+          for (int i = 0; i < count; i++) {
+            ordinals.ints[ordinals.length] = (int) dv.nextValue();
+            ordinals.length++;
+          }
        }
-        final BytesRef bytes;
-        if (values.docID() == docID) {
-          bytes = values.binaryValue();
-        } else {
-          bytes = new BytesRef(BytesRef.EMPTY_BYTES);
-        }
-        decode(bytes, ordinals);
      }
    };
  }
@ -91,33 +103,6 @@ public class DocValuesOrdinalsReader extends OrdinalsReader {
   * @param ordinals buffer for decoded ordinals
   */
  public void decode(BytesRef buf, IntsRef ordinals) {
-
-    // grow the buffer up front, even if by a large number of values (buf.length)
-    // that saves the need to check inside the loop for every decoded value if
-    // the buffer needs to grow.
-    if (ordinals.ints.length < buf.length) {
-      ordinals.ints = ArrayUtil.grow(ordinals.ints, buf.length);
-    }
-
-    ordinals.offset = 0;
-    ordinals.length = 0;
-
-    // it is better if the decoding is inlined like so, and not e.g.
-    // in a utility method
-    int upto = buf.offset + buf.length;
-    int value = 0;
-    int offset = buf.offset;
-    int prev = 0;
-    while (offset < upto) {
-      byte b = buf.bytes[offset++];
-      if (b >= 0) {
-        ordinals.ints[ordinals.length] = ((value << 7) | b) + prev;
-        value = 0;
-        prev = ordinals.ints[ordinals.length];
-        ordinals.length++;
-      } else {
-        value = (value << 7) | (b & 0x7F);
-      }
-    }
+    BackCompatSortedNumericDocValues.loadValues(buf, ordinals);
  }
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
@ -19,17 +19,17 @@ package org.apache.lucene.facet.taxonomy;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
+import org.apache.lucene.facet.FacetUtils;
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
 import org.apache.lucene.facet.FacetsConfig;
-import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.search.ConjunctionUtils;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;

 /**
 * Computes facets counts, assuming the default encoding into DocValues was used.
@ -70,8 +70,9 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {

  private final void count(List<MatchingDocs> matchingDocs) throws IOException {
    for (MatchingDocs hits : matchingDocs) {
-      BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
-      if (dv == null) { // this reader does not have DocValues for the requested category list
+      SortedNumericDocValues dv =
+          FacetUtils.loadOrdinalValues(hits.context.reader(), indexFieldName);
+      if (dv == null) {
        continue;
      }

@ -79,21 +80,8 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
          ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));

      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
-        final BytesRef bytesRef = dv.binaryValue();
-        byte[] bytes = bytesRef.bytes;
-        int end = bytesRef.offset + bytesRef.length;
-        int ord = 0;
-        int offset = bytesRef.offset;
-        int prev = 0;
-        while (offset < end) {
-          byte b = bytes[offset++];
-          if (b >= 0) {
-            prev = ord = ((ord << 7) | b) + prev;
-            increment(ord);
-            ord = 0;
-          } else {
-            ord = (ord << 7) | (b & 0x7F);
-          }
+        for (int i = 0; i < dv.docValueCount(); i++) {
+          increment((int) dv.nextValue());
        }
      }
    }
@ -103,8 +91,8 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {

  private final void countAll(IndexReader reader) throws IOException {
    for (LeafReaderContext context : reader.leaves()) {
-      BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName);
-      if (dv == null) { // this reader does not have DocValues for the requested category list
+      SortedNumericDocValues dv = FacetUtils.loadOrdinalValues(context.reader(), indexFieldName);
+      if (dv == null) {
        continue;
      }

@ -114,21 +102,9 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
        if (liveDocs != null && liveDocs.get(doc) == false) {
          continue;
        }
-        final BytesRef bytesRef = dv.binaryValue();
-        byte[] bytes = bytesRef.bytes;
-        int end = bytesRef.offset + bytesRef.length;
-        int ord = 0;
-        int offset = bytesRef.offset;
-        int prev = 0;
-        while (offset < end) {
-          byte b = bytes[offset++];
-          if (b >= 0) {
-            prev = ord = ((ord << 7) | b) + prev;
-            increment(ord);
-            ord = 0;
-          } else {
-            ord = (ord << 7) | (b & 0x7F);
-          }
+
+        for (int i = 0; i < dv.docValueCount(); i++) {
+          increment((int) dv.nextValue());
        }
      }
    }
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalMappingLeafReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalMappingLeafReader.java
@ -16,7 +16,9 @@
 */
 package org.apache.lucene.facet.taxonomy;

+import com.carrotsearch.hppc.IntArrayList;
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.facet.FacetsConfig;
@ -26,7 +28,10 @@ import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.Ordina
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.FilterBinaryDocValues;
 import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.FilterSortedNumericDocValues;
 import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;

@ -107,6 +112,66 @@ public class OrdinalMappingLeafReader extends FilterLeafReader {
    }
  }

+  private class OrdinalMappingSortedNumericDocValues extends FilterSortedNumericDocValues {
+    private final IntArrayList currentValues;
+    private int currIndex;
+
+    OrdinalMappingSortedNumericDocValues(SortedNumericDocValues in) {
+      super(in);
+      currentValues = new IntArrayList(32);
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      boolean result = in.advanceExact(target);
+      if (result) {
+        reloadValues();
+      }
+      return result;
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      int result = in.advance(target);
+      if (result != DocIdSetIterator.NO_MORE_DOCS) {
+        reloadValues();
+      }
+      return result;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      int result = in.nextDoc();
+      if (result != DocIdSetIterator.NO_MORE_DOCS) {
+        reloadValues();
+      }
+      return result;
+    }
+
+    @Override
+    public int docValueCount() {
+      return currentValues.elementsCount;
+    }
+
+    private void reloadValues() throws IOException {
+      currIndex = 0;
+      currentValues.clear();
+      for (int i = 0; i < in.docValueCount(); i++) {
+        int originalOrd = Math.toIntExact(in.nextValue());
+        currentValues.add(ordinalMap[originalOrd]);
+      }
+      Arrays.sort(currentValues.buffer, 0, currentValues.elementsCount);
+    }
+
+    @Override
+    public long nextValue() {
+      assert currIndex < currentValues.size();
+      int actual = currentValues.get(currIndex);
+      currIndex++;
+      return actual;
+    }
+  }
+
  private final int[] ordinalMap;
  private final InnerFacetsConfig facetsConfig;
  private final Set<String> facetFields;
@ -125,31 +190,59 @@ public class OrdinalMappingLeafReader extends FilterLeafReader {
    }
    // always add the default indexFieldName. This is because FacetsConfig does
    // not explicitly record dimensions that were indexed under the default
-    // DimConfig, unless they have a custome DimConfig.
+    // DimConfig, unless they have a custom DimConfig.
    facetFields.add(FacetsConfig.DEFAULT_DIM_CONFIG.indexFieldName);
  }

  /**
   * Expert: encodes category ordinals into a BytesRef. Override in case you use custom encoding,
   * other than the default done by FacetsConfig.
+   *
+   * @deprecated Custom binary formats are no longer directly supported for taxonomy faceting
+   *     starting in Lucene 9
   */
+  @Deprecated
  protected BytesRef encode(IntsRef ordinals) {
    return facetsConfig.dedupAndEncode(ordinals);
  }

-  /** Expert: override in case you used custom encoding for the categories under this field. */
+  /**
+   * Expert: override in case you used custom encoding for the categories under this field.
+   *
+   * @deprecated Custom binary formats are no longer directly supported for taxonomy faceting
+   *     starting in Lucene 9
+   */
+  @Deprecated
  protected OrdinalsReader getOrdinalsReader(String field) {
    return new DocValuesOrdinalsReader(field);
  }

  @Override
  public BinaryDocValues getBinaryDocValues(String field) throws IOException {
-    if (facetFields.contains(field)) {
+    BinaryDocValues original = in.getBinaryDocValues(field);
+    if (original != null && facetFields.contains(field)) {
+      // The requested field is a facet ordinals field _and_ it's non-null, so move forward with
+      // mapping:
      final OrdinalsReader ordsReader = getOrdinalsReader(field);
-      return new OrdinalMappingBinaryDocValues(
-          ordsReader.getReader(in.getContext()), in.getBinaryDocValues(field));
+      return new OrdinalMappingBinaryDocValues(ordsReader.getReader(in.getContext()), original);
    } else {
-      return in.getBinaryDocValues(field);
+      // The requested field either isn't present (null) or isn't a facet ordinals field. Either
+      // way, just return the original:
+      return original;
+    }
+  }
+
+  @Override
+  public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
+    SortedNumericDocValues original = in.getSortedNumericDocValues(field);
+    if (original != null && facetFields.contains(field)) {
+      // The requested field is a facet ordinals field _and_ it's non-null, so move forward with
+      // mapping:
+      return new OrdinalMappingSortedNumericDocValues(original);
+    } else {
+      // The requested field either isn't present (null) or isn't a facet ordinals field. Either
+      // way, just return the original:
+      return original;
    }
  }

--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalsReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/OrdinalsReader.java
@ -20,7 +20,13 @@ import java.io.IOException;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.util.IntsRef;

-/** Provides per-document ordinals. */
+/**
+ * Provides per-document ordinals.
+ *
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+ *     Lucene 9
+ */
+@Deprecated
 public abstract class OrdinalsReader {

  /** Returns ordinals for documents in one segment. */
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java
@ -29,8 +29,11 @@ import org.apache.lucene.util.IntsRef;
 * Reads from any {@link OrdinalsReader}; use {@link FastTaxonomyFacetCounts} if you are using the
 * default encoding from {@link BinaryDocValues}.
 *
- * @lucene.experimental
+ * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+ *     Lucene 9. Please switch to {@link FastTaxonomyFacetCounts} or implement your own {@link
+ *     org.apache.lucene.facet.Facets} implementation if you have custom needs.
 */
+@Deprecated
 public class TaxonomyFacetCounts extends IntTaxonomyFacets {
  private final OrdinalsReader ordinalsReader;

--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java
@ -20,7 +20,10 @@ import static org.apache.lucene.facet.taxonomy.TaxonomyReader.INVALID_ORDINAL;
 import static org.apache.lucene.facet.taxonomy.TaxonomyReader.ROOT_ORDINAL;

 import java.io.IOException;
+import org.apache.lucene.facet.FacetUtils;
+import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.util.IntsRef;

 /**
@ -34,19 +37,16 @@ public class TaxonomyFacetLabels {
  /** {@code TaxonomyReader} provided to the constructor */
  private final TaxonomyReader taxoReader;

-  /**
-   * {@code OrdinalsReader} to decode ordinals previously indexed into the {@code BinaryDocValues}
-   * facet field
-   */
-  private final OrdinalsReader ordsReader;
+  /** field storing the taxonomy ordinals */
+  private final String indexFieldName;

  /**
   * Sole constructor. Do not close the provided {@link TaxonomyReader} while still using this
   * instance!
   */
-  public TaxonomyFacetLabels(TaxonomyReader taxoReader, String indexFieldName) throws IOException {
+  public TaxonomyFacetLabels(TaxonomyReader taxoReader, String indexFieldName) {
    this.taxoReader = taxoReader;
-    this.ordsReader = new DocValuesOrdinalsReader(indexFieldName);
+    this.indexFieldName = indexFieldName;
  }

  /**
@ -62,7 +62,13 @@ public class TaxonomyFacetLabels {
   * @throws IOException when a low-level IO issue occurs
   */
  public FacetLabelReader getFacetLabelReader(LeafReaderContext readerContext) throws IOException {
-    return new FacetLabelReader(ordsReader, readerContext);
+    SortedNumericDocValues ordinalValues =
+        FacetUtils.loadOrdinalValues(readerContext.reader(), indexFieldName);
+    if (ordinalValues == null) {
+      ordinalValues = DocValues.emptySortedNumeric();
+    }
+
+    return new FacetLabelReader(ordinalValues);
  }

  /**
@ -71,18 +77,50 @@ public class TaxonomyFacetLabels {
   * @lucene.experimental
   */
  public class FacetLabelReader {
+    /** By default, we store taxonomy ordinals in SortedNumericDocValues field */
+    private final SortedNumericDocValues ordinalValues;
+
+    /**
+     * Users can provide their own custom OrdinalsReader for cases where the default encoding isn't
+     * used. This capability is deprecated and will be removed in Lucene 10.
+     */
    private final OrdinalsReader.OrdinalsSegmentReader ordinalsSegmentReader;
-    private final IntsRef decodedOrds = new IntsRef();
+
+    private final IntsRef decodedOrds;
+
    private int currentDocId = -1;
-    private int currentPos = -1;
+    private boolean currentDocHasValues;
+    private int currentPos;
+    private int currentDocOrdinalCount;

    // Lazily set when nextFacetLabel(int docId, String facetDimension) is first called
    private int[] parents;

-    /** Sole constructor. */
+    /**
+     * Construct from a specified {@link SortedNumericDocValues} field; useful for reading the
+     * default encoding.
+     */
+    public FacetLabelReader(SortedNumericDocValues ordinalValues) {
+      this.ordinalValues = ordinalValues;
+      ordinalsSegmentReader = null;
+      decodedOrds = null;
+    }
+
+    /**
+     * Construct using a custom {@link OrdinalsReader}; useful if using a custom binary format.
+     *
+     * <p>Note: If using the default encoding, you can use {@link
+     * #FacetLabelReader(SortedNumericDocValues)} directly
+     *
+     * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting
+     *     with Lucene 9
+     */
+    @Deprecated
    public FacetLabelReader(OrdinalsReader ordsReader, LeafReaderContext readerContext)
        throws IOException {
      ordinalsSegmentReader = ordsReader.getReader(readerContext);
+      decodedOrds = new IntsRef();
+      ordinalValues = null;
    }

    /**
@ -108,20 +146,45 @@ public class TaxonomyFacetLabels {
          throw new IllegalArgumentException(
              "docs out of order: previous docId=" + currentDocId + " current docId=" + docId);
        }
-        ordinalsSegmentReader.get(docId, decodedOrds);
+
        currentDocId = docId;
-        currentPos = decodedOrds.offset;
+
+        if (ordinalsSegmentReader != null) {
+          ordinalsSegmentReader.get(docId, decodedOrds);
+          currentPos = decodedOrds.offset;
+        } else {
+          currentDocHasValues = ordinalValues.advanceExact(docId);
+          if (currentDocHasValues) {
+            currentDocOrdinalCount = ordinalValues.docValueCount();
+            currentPos = 0;
+          }
+        }
      }

-      int endPos = decodedOrds.offset + decodedOrds.length;
-      assert currentPos <= endPos;
+      int ord;
+      if (ordinalsSegmentReader != null) {
+        int endPos = decodedOrds.offset + decodedOrds.length;
+        assert currentPos <= endPos;

-      if (currentPos == endPos) {
-        // no more FacetLabels
-        return null;
+        if (currentPos == endPos) {
+          return null;
+        }
+
+        ord = decodedOrds.ints[currentPos++];
+      } else {
+        if (currentDocHasValues == false) {
+          return null;
+        }
+
+        assert currentPos <= currentDocOrdinalCount;
+        if (currentPos == currentDocOrdinalCount) {
+          return null;
+        }
+
+        ord = (int) ordinalValues.nextValue();
+        currentPos++;
      }

-      int ord = decodedOrds.ints[currentPos++];
      return taxoReader.getPath(ord);
    }

@ -168,24 +231,61 @@ public class TaxonomyFacetLabels {
          throw new IllegalArgumentException(
              "docs out of order: previous docId=" + currentDocId + " current docId=" + docId);
        }
-        ordinalsSegmentReader.get(docId, decodedOrds);
-        currentPos = decodedOrds.offset;
        currentDocId = docId;
-      }

-      if (parents == null) {
-        parents = taxoReader.getParallelTaxonomyArrays().parents();
-      }
-
-      int endPos = decodedOrds.offset + decodedOrds.length;
-      assert currentPos <= endPos;
-
-      for (; currentPos < endPos; ) {
-        int ord = decodedOrds.ints[currentPos++];
-        if (isDescendant(ord, parentOrd) == true) {
-          return taxoReader.getPath(ord);
+        if (ordinalsSegmentReader != null) {
+          ordinalsSegmentReader.get(docId, decodedOrds);
+          currentPos = decodedOrds.offset;
+        } else {
+          currentDocHasValues = ordinalValues.advanceExact(docId);
+          if (currentDocHasValues) {
+            currentDocOrdinalCount = ordinalValues.docValueCount();
+            currentPos = 0;
+          }
        }
      }
+
+      if (ordinalsSegmentReader != null) {
+        int endPos = decodedOrds.offset + decodedOrds.length;
+        assert currentPos <= endPos;
+
+        if (currentPos == endPos) {
+          return null;
+        }
+
+        if (parents == null) {
+          parents = taxoReader.getParallelTaxonomyArrays().parents();
+        }
+
+        do {
+          int ord = decodedOrds.ints[currentPos++];
+          if (isDescendant(ord, parentOrd) == true) {
+            return taxoReader.getPath(ord);
+          }
+        } while (currentPos < endPos);
+      } else {
+        if (currentDocHasValues == false) {
+          return null;
+        }
+
+        assert currentPos <= currentDocOrdinalCount;
+        if (currentPos == currentDocOrdinalCount) {
+          return null;
+        }
+
+        if (parents == null) {
+          parents = taxoReader.getParallelTaxonomyArrays().parents();
+        }
+
+        do {
+          int ord = (int) ordinalValues.nextValue();
+          currentPos++;
+          if (isDescendant(ord, parentOrd) == true) {
+            return taxoReader.getPath(ord);
+          }
+        } while (currentPos < currentDocOrdinalCount);
+      }
+
      return null;
    }
  }
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetSumValueSource.java
@ -18,9 +18,12 @@ package org.apache.lucene.facet.taxonomy;

 import java.io.IOException;
 import java.util.List;
+import org.apache.lucene.facet.FacetUtils;
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
 import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.search.ConjunctionUtils;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.DoubleValues;
 import org.apache.lucene.search.DoubleValuesSource;
@ -36,8 +39,7 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {

  /**
   * Aggreggates double facet values from the provided {@link DoubleValuesSource}, pulling ordinals
-   * using {@link DocValuesOrdinalsReader} against the default indexed facet field {@link
-   * FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
+   * from the default indexed facet field {@link FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
   */
  public TaxonomyFacetSumValueSource(
      TaxonomyReader taxoReader,
@ -45,18 +47,33 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
      FacetsCollector fc,
      DoubleValuesSource valueSource)
      throws IOException {
-    this(
-        new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME),
-        taxoReader,
-        config,
-        fc,
-        valueSource);
+    this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc, valueSource);
+  }
+
+  /**
+   * Aggreggates double facet values from the provided {@link DoubleValuesSource}, pulling ordinals
+   * from the specified indexed facet field.
+   */
+  public TaxonomyFacetSumValueSource(
+      String indexField,
+      TaxonomyReader taxoReader,
+      FacetsConfig config,
+      FacetsCollector fc,
+      DoubleValuesSource valueSource)
+      throws IOException {
+    super(indexField, taxoReader, config);
+    ordinalsReader = null;
+    sumValues(fc.getMatchingDocs(), fc.getKeepScores(), valueSource);
  }

  /**
   * Aggreggates float facet values from the provided {@link DoubleValuesSource}, and pulls ordinals
   * from the provided {@link OrdinalsReader}.
+   *
+   * @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
+   *     Lucene 9
   */
+  @Deprecated
  public TaxonomyFacetSumValueSource(
      OrdinalsReader ordinalsReader,
      TaxonomyReader taxoReader,
@ -91,20 +108,47 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
      List<MatchingDocs> matchingDocs, boolean keepScores, DoubleValuesSource valueSource)
      throws IOException {

-    IntsRef scratch = new IntsRef();
-    for (MatchingDocs hits : matchingDocs) {
-      OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
-      DoubleValues scores = keepScores ? scores(hits) : null;
-      DoubleValues functionValues = valueSource.getValues(hits.context, scores);
-      DocIdSetIterator docs = hits.bits.iterator();
+    if (ordinalsReader != null) {
+      // If the user provided a custom ordinals reader, use it to retrieve the document ordinals:
+      IntsRef scratch = new IntsRef();
+      for (MatchingDocs hits : matchingDocs) {
+        OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
+        DoubleValues scores = keepScores ? scores(hits) : null;
+        DoubleValues functionValues = valueSource.getValues(hits.context, scores);
+        DocIdSetIterator docs = hits.bits.iterator();

-      int doc;
-      while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-        ords.get(doc, scratch);
-        if (functionValues.advanceExact(doc)) {
-          float value = (float) functionValues.doubleValue();
-          for (int i = 0; i < scratch.length; i++) {
-            values[scratch.ints[i]] += value;
+        int doc;
+        while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+          ords.get(doc, scratch);
+          if (functionValues.advanceExact(doc)) {
+            float value = (float) functionValues.doubleValue();
+            for (int i = 0; i < scratch.length; i++) {
+              values[scratch.ints[i]] += value;
+            }
+          }
+        }
+      }
+    } else {
+      // If no custom ordinals reader is provided, expect the default encoding:
+      for (MatchingDocs hits : matchingDocs) {
+        SortedNumericDocValues ordinalValues =
+            FacetUtils.loadOrdinalValues(hits.context.reader(), indexFieldName);
+        if (ordinalValues == null) {
+          continue;
+        }
+
+        DoubleValues scores = keepScores ? scores(hits) : null;
+        DoubleValues functionValues = valueSource.getValues(hits.context, scores);
+        DocIdSetIterator it =
+            ConjunctionUtils.intersectIterators(List.of(hits.bits.iterator(), ordinalValues));
+
+        for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+          if (functionValues.advanceExact(doc)) {
+            float value = (float) functionValues.doubleValue();
+            int ordinalCount = ordinalValues.docValueCount();
+            for (int i = 0; i < ordinalCount; i++) {
+              values[(int) ordinalValues.nextValue()] += value;
+            }
          }
        }
      }
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java
@ -97,4 +97,18 @@ public interface TaxonomyWriter extends Closeable, TwoPhaseCommit {

  /** Returns the commit user data iterable that was set on {@link #setLiveCommitData(Iterable)}. */
  public Iterable<Map.Entry<String, String>> getLiveCommitData();
+
+  /**
+   * Determine whether-or-not to store taxonomy ordinals for each document using the older binary
+   * format or the newer SortedNumericDocValues format (based on the version used to create the
+   * index).
+   *
+   * @deprecated Please don't rely on this method as it will be removed in Lucene 10. It's being
+   *     introduced to support backwards-compatibility with Lucene 8 and earlier index formats
+   *     temporarily.
+   */
+  @Deprecated
+  default boolean useNumericDocValuesForOrdinals() {
+    return false;
+  }
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@ -162,7 +162,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
    openMode = config.getOpenMode();
    if (DirectoryReader.indexExists(directory) == false) {
      indexEpoch = 1;
-      // no commit exists so we can safely use the new BinaryDocValues field
+      // no commit exists so we can safely use the newer formats:
      useOlderFormat = false;
    } else {
      String epochStr = null;
@ -1005,4 +1005,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
  public final long getTaxonomyEpoch() {
    return indexEpoch;
  }
+
+  @Override
+  public boolean useNumericDocValuesForOrdinals() {
+    return useOlderFormat == false;
+  }
 }
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestMultipleIndexFields.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestMultipleIndexFields.java
@ -190,7 +190,7 @@ public class TestMultipleIndexFields extends FacetTestCase {
  private void assertOrdinalsExist(String field, IndexReader ir) throws IOException {
    for (LeafReaderContext context : ir.leaves()) {
      LeafReader r = context.reader();
-      if (r.getBinaryDocValues(field) != null) {
+      if (r.getSortedNumericDocValues(field) != null) {
        return; // not all segments must have this DocValues
      }
    }
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestBackCompatSortedNumericDocValues.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestBackCompatSortedNumericDocValues.java
@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.facet.taxonomy;
+
+import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopFieldDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestBackCompatSortedNumericDocValues extends LuceneTestCase {
+
+  private static class FacetsConfigWrapper extends FacetsConfig {
+    public BytesRef encodeValues(IntsRef values) {
+      return dedupAndEncode(values);
+    }
+  }
+
+  public void testRandom() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+
+    // sorta big scratch so we don't have to think about reallocating:
+    IntsRef scratch = new IntsRef(100);
+
+    // used to access default binary encoding easily:
+    FacetsConfigWrapper facetsConfig = new FacetsConfigWrapper();
+
+    // keep track of the values we expect to see for each doc:
+    Map<String, List<Integer>> expectedValues = new HashMap<>();
+
+    int numDocs = atLeast(100);
+    for (int i = 0; i < numDocs; i++) {
+      int numValues = RandomNumbers.randomIntBetween(random(), 1, 50);
+      scratch.length = 0;
+      scratch.offset = 0;
+      Set<Integer> values = new HashSet<>();
+      for (int j = 0; j < numValues; j++) {
+        int value = random().nextInt(Integer.MAX_VALUE);
+        values.add(value);
+        // we might have dups in here, which is fine (encoding takes care of deduping and sorting):
+        scratch.ints[j] = value;
+        scratch.length++;
+      }
+      // we expect to get sorted and deduped values back out:
+      expectedValues.put(String.valueOf(i), values.stream().sorted().collect(Collectors.toList()));
+
+      Document doc = new Document();
+      doc.add(new StoredField("id", String.valueOf(i)));
+      doc.add(new BinaryDocValuesField("bdv", facetsConfig.encodeValues(scratch)));
+      writer.addDocument(doc);
+    }
+
+    writer.forceMerge(1);
+    writer.commit();
+
+    IndexReader reader = writer.getReader();
+    IndexSearcher searcher = newSearcher(reader);
+    writer.close();
+
+    assert reader.leaves().size() == 1;
+    BinaryDocValues binaryDocValues = reader.leaves().get(0).reader().getBinaryDocValues("bdv");
+    assertNotNull(binaryDocValues);
+    SortedNumericDocValues docValues = BackCompatSortedNumericDocValues.wrap(binaryDocValues, null);
+
+    TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), numDocs, Sort.INDEXORDER);
+
+    for (ScoreDoc scoreDoc : docs.scoreDocs) {
+      String id = reader.document(scoreDoc.doc).get("id");
+      int docId = scoreDoc.doc;
+
+      int doc;
+      if (random().nextBoolean()) {
+        doc = docValues.nextDoc();
+      } else {
+        if (random().nextBoolean()) {
+          doc = docValues.advance(docId);
+        } else {
+          assertTrue(docValues.advanceExact(docId));
+          doc = docId;
+        }
+      }
+      assertEquals(docId, doc);
+      assertEquals(docId, docValues.docID());
+
+      List<Integer> expected = expectedValues.get(id);
+      assertEquals(expected.size(), docValues.docValueCount());
+      checkValues(expected, docValues);
+    }
+
+    // Run off the end and make sure that case is handled gracefully:
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.nextDoc());
+
+    IOUtils.close(reader, dir);
+  }
+
+  private void checkValues(List<Integer> expected, SortedNumericDocValues values)
+      throws IOException {
+    for (Integer e : expected) {
+      assertEquals((long) e, values.nextValue());
+    }
+  }
+}
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetSumValueSource.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetSumValueSource.java
@ -410,9 +410,15 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
    FacetsCollector.search(newSearcher(r), new MatchAllDocsQuery(), 10, fc);

    Facets facets1 = getTaxonomyFacetCounts(taxoReader, config, fc);
-    Facets facets2 =
-        new TaxonomyFacetSumValueSource(
-            new DocValuesOrdinalsReader("$b"), taxoReader, config, fc, DoubleValuesSource.SCORES);
+    Facets facets2;
+    if (random().nextBoolean()) {
+      facets2 =
+          new TaxonomyFacetSumValueSource(
+              new DocValuesOrdinalsReader("$b"), taxoReader, config, fc, DoubleValuesSource.SCORES);
+    } else {
+      facets2 =
+          new TaxonomyFacetSumValueSource("$b", taxoReader, config, fc, DoubleValuesSource.SCORES);
+    }

    assertEquals(r.maxDoc(), facets1.getTopChildren(10, "a").value.intValue());
    assertEquals(r.maxDoc(), facets2.getTopChildren(10, "b").value.doubleValue(), 1E-10);
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
@ -20,10 +20,32 @@ import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.DrillDownQuery;
+import org.apache.lucene.facet.FacetField;
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.Facets;
+import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.facet.taxonomy.DocValuesOrdinalsReader;
 import org.apache.lucene.facet.taxonomy.FacetLabel;
+import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
+import org.apache.lucene.facet.taxonomy.TaxonomyFacetCounts;
+import org.apache.lucene.facet.taxonomy.TaxonomyFacetLabels;
+import org.apache.lucene.facet.taxonomy.TaxonomyFacetSumValueSource;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.junit.Ignore;
@ -49,50 +71,196 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
  //
  // Then move the zip file to your trunk checkout and use it in your test cases

-  public static final String oldTaxonomyIndexName = "taxonomy.8.10.0-cfs";
+  private static final String OLD_TAXONOMY_INDEX_NAME = "taxonomy.8.11.0-cfs";
+  private static final String OLD_INDEX_NAME = "index.8.11.0-cfs";

  public void testCreateNewTaxonomy() throws IOException {
-    createNewTaxonomyIndex(oldTaxonomyIndexName);
+    createNewTaxonomyIndex(OLD_TAXONOMY_INDEX_NAME, OLD_INDEX_NAME);
  }

-  // Opens up a pre-existing old taxonomy index and adds new BinaryDocValues based fields
-  private void createNewTaxonomyIndex(String dirName) throws IOException {
-    Path indexDir = createTempDir(oldTaxonomyIndexName);
-    TestUtil.unzip(getDataInputStream(dirName + ".zip"), indexDir);
-    Directory dir = newFSDirectory(indexDir);
+  /**
+   * This test exercises a bunch of different faceting operations and directly taxonomy index
+   * reading to make sure more modern faceting formats introduced in 9.0 are backwards-compatible
+   * with 8.x indexes. It requires an "older" 8.x index to be in place with assumed docs/categories
+   * already present. It makes sure it can still run a number of different "read" operations against
+   * the old index, then it writes new content, forces a merge and does a bunch more "read"
+   * operations. It may seem a bit chaotic, but it's trying to test a number of different
+   * faceting-related implementations that require specific back-compat support.
+   */
+  private void createNewTaxonomyIndex(String taxoDirName, String indexDirName) throws IOException {
+    Path taxoPath = createTempDir(taxoDirName);
+    Path indexPath = createTempDir(indexDirName);

-    DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+    TestUtil.unzip(getDataInputStream(taxoDirName + ".zip"), taxoPath);
+    TestUtil.unzip(getDataInputStream(indexDirName + ".zip"), indexPath);
+
+    Directory taxoDir = newFSDirectory(taxoPath);
+    Directory indexDir = newFSDirectory(indexPath);
+
+    // Open the existing indexes (explicitly open in APPEND mode and fail if they don't exist since
+    // we're trying to test
+    // back-compat with existing indexes):
+    DirectoryTaxonomyWriter taxoWriter =
+        new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.APPEND);
+    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
+    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
+    RandomIndexWriter indexWriter =
+        new RandomIndexWriter(random(), indexDir, indexWriterConfig, random().nextBoolean());
+
+    // Use a default FacetsConfig. This assumes that we didn't need to register anything interesting
+    // when creating
+    // the older format index. If that changes, we need a way to ensure we re-use the same facet
+    // configuration used
+    // in created the old format taxonomy index:
+    FacetsConfig facetsConfig = new FacetsConfig();
+
+    // At this point we should have a taxonomy index and "regular" index containing some taxonomy
+    // categories and
+    // documents with facet ordinals indexed. Confirm that we can facet and search against it as-is
+    // before adding
+    // anything new. Of course these tests are highly dependent on the index we're starting with, so
+    // they will
+    // need to be updated accordingly if the "old" test index changes:
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+    IndexSearcher searcher = newSearcher(indexWriter.getReader());
+    FacetsCollector facetsCollector = new FacetsCollector();
+    searcher.search(new MatchAllDocsQuery(), facetsCollector);
+    // Test a few different facet implementations that we know have back-compat implications:
+    Facets facets = new FastTaxonomyFacetCounts(taxoReader, facetsConfig, facetsCollector);
+    FacetResult facetResult = facets.getTopChildren(10, "f1");
+    assertEquals(2, facetResult.value);
+    facets =
+        new TaxonomyFacetCounts(
+            new DocValuesOrdinalsReader(), taxoReader, facetsConfig, facetsCollector);
+    facetResult = facets.getTopChildren(10, "f1");
+    assertEquals(2, facetResult.value);
+    facets =
+        new TaxonomyFacetSumValueSource(
+            taxoReader, facetsConfig, facetsCollector, DoubleValuesSource.constant(1d));
+    facetResult = facets.getTopChildren(10, "f1");
+    assertEquals(2.0f, facetResult.value);
+    // Test that we can drill-down as expected (and read facet labels from matching docs):
+    TaxonomyFacetLabels facetLabels =
+        new TaxonomyFacetLabels(taxoReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
+    assert (searcher.getIndexReader().leaves().size() == 1);
+    TaxonomyFacetLabels.FacetLabelReader labelReader =
+        facetLabels.getFacetLabelReader(searcher.getIndexReader().leaves().get(0));
+    DrillDownQuery query = new DrillDownQuery(facetsConfig, new MatchAllDocsQuery());
+    query.add("f1", "foo");
+    TopDocs docResults = searcher.search(query, 10);
+    assertEquals(1, docResults.totalHits.value);
+    int docId = docResults.scoreDocs[0].doc;
+    Set<FacetLabel> labels = new HashSet<>();
+    for (FacetLabel label = labelReader.nextFacetLabel(docId);
+        label != null;
+        label = labelReader.nextFacetLabel(docId)) {
+      labels.add(label);
+    }
+    assertEquals(2, labels.size());
+    assertTrue(
+        labels.containsAll(List.of(new FacetLabel("f1", "foo"), new FacetLabel("f2", "foo"))));
+    assertEquals(0, docResults.scoreDocs[0].doc);
+    // And make sure we can read directly from the taxonomy like we'd expect:
+    int ord = taxoReader.getOrdinal(new FacetLabel("f1", "foo"));
+    assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+    assertNotNull(taxoReader.getPath(ord));
+
+    // Now we'll add some new docs and taxonomy categories, force merge (to make sure that goes
+    // well) and then do
+    // some more searches, etc.:
+    Document doc = new Document();
+    doc.add(new FacetField("f1", "zed"));
+    indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));

    FacetLabel cp_c = new FacetLabel("c");
-    writer.addCategory(cp_c);
-    writer.getInternalIndexWriter().forceMerge(1);
-    writer.commit();
+    taxoWriter.addCategory(cp_c);

-    TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
+    indexWriter.forceMerge(1);
+    taxoWriter.getInternalIndexWriter().forceMerge(1);
+    indexWriter.commit();
+    taxoWriter.commit();

-    int ord1 = reader.getOrdinal(new FacetLabel("a"));
-    assert ord1 != TaxonomyReader.INVALID_ORDINAL;
-    // Just asserting ord1 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
-    assertNotNull(reader.getPath(ord1));
+    IOUtils.close(taxoReader, searcher.getIndexReader());
+    taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+    searcher = newSearcher(indexWriter.getReader());
+    IOUtils.close(indexWriter, taxoWriter);

-    int ord2 = reader.getOrdinal(new FacetLabel("b"));
-    assert ord2 != TaxonomyReader.INVALID_ORDINAL;
+    // Re-test a number of different use-cases, which should now "see" the newly added content:
+    facetsCollector = new FacetsCollector();
+    searcher.search(new MatchAllDocsQuery(), facetsCollector);
+    facets = new FastTaxonomyFacetCounts(taxoReader, facetsConfig, facetsCollector);
+    facetResult = facets.getTopChildren(10, "f1");
+    assertEquals(3, facetResult.value);
+    facets =
+        new TaxonomyFacetCounts(
+            new DocValuesOrdinalsReader(), taxoReader, facetsConfig, facetsCollector);
+    facetResult = facets.getTopChildren(10, "f1");
+    assertEquals(3, facetResult.value);
+    facets =
+        new TaxonomyFacetSumValueSource(
+            taxoReader, facetsConfig, facetsCollector, DoubleValuesSource.constant(1d));
+    facetResult = facets.getTopChildren(10, "f1");
+    assertEquals(3.0f, facetResult.value);
+    // Test that we can drill-down as expected, and access facet labels:
+    facetLabels = new TaxonomyFacetLabels(taxoReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
+    assert (searcher.getIndexReader().leaves().size() == 1);
+    labelReader = facetLabels.getFacetLabelReader(searcher.getIndexReader().leaves().get(0));
+    query = new DrillDownQuery(facetsConfig, new MatchAllDocsQuery());
+    query.add("f1", "foo");
+    docResults = searcher.search(query, 10);
+    assertEquals(1, docResults.totalHits.value);
+    docId = docResults.scoreDocs[0].doc;
+    labels = new HashSet<>();
+    for (FacetLabel label = labelReader.nextFacetLabel(docId);
+        label != null;
+        label = labelReader.nextFacetLabel(docId)) {
+      labels.add(label);
+    }
+    assertEquals(2, labels.size());
+    assertTrue(
+        labels.containsAll(List.of(new FacetLabel("f1", "foo"), new FacetLabel("f2", "foo"))));
+    labelReader = facetLabels.getFacetLabelReader(searcher.getIndexReader().leaves().get(0));
+    query = new DrillDownQuery(facetsConfig, new MatchAllDocsQuery());
+    query.add("f1", "zed");
+    docResults = searcher.search(query, 10);
+    assertEquals(1, docResults.totalHits.value);
+    docId = docResults.scoreDocs[0].doc;
+    labels = new HashSet<>();
+    for (FacetLabel label = labelReader.nextFacetLabel(docId);
+        label != null;
+        label = labelReader.nextFacetLabel(docId)) {
+      labels.add(label);
+    }
+    assertEquals(1, labels.size());
+    assertTrue(labels.contains(new FacetLabel("f1", "zed")));
+    // And make sure we can read directly from the taxonomy like we'd expect:
+    ord = taxoReader.getOrdinal(new FacetLabel("f1", "foo"));
+    assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+    assertNotNull(taxoReader.getPath(ord));
+    ord = taxoReader.getOrdinal(new FacetLabel("f1", "zed"));
+    assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+    assertNotNull(taxoReader.getPath(ord));
+    // And check a few more direct reads from the taxonomy:
+    ord = taxoReader.getOrdinal(new FacetLabel("a"));
+    assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+    assertNotNull(taxoReader.getPath(ord));
+
+    ord = taxoReader.getOrdinal(new FacetLabel("b"));
+    assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
    // Just asserting ord2 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
-    assertNotNull(reader.getPath(ord2));
+    assertNotNull(taxoReader.getPath(ord));

-    int ord3 = reader.getOrdinal(cp_c);
-    assert ord3 != TaxonomyReader.INVALID_ORDINAL;
-    assertNotNull(reader.getPath(ord3));
+    ord = taxoReader.getOrdinal(cp_c);
+    assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
+    assertNotNull(taxoReader.getPath(ord));

-    reader.close();
-    writer.close();
-    dir.close();
+    IOUtils.close(taxoReader, searcher.getIndexReader(), taxoDir, indexDir);
  }

  // Opens up a pre-existing index and tries to run getBulkPath on it
  public void testGetBulkPathOnOlderCodec() throws Exception {
-    Path indexDir = createTempDir(oldTaxonomyIndexName);
-    TestUtil.unzip(getDataInputStream(oldTaxonomyIndexName + ".zip"), indexDir);
+    Path indexDir = createTempDir(OLD_TAXONOMY_INDEX_NAME);
+    TestUtil.unzip(getDataInputStream(OLD_TAXONOMY_INDEX_NAME + ".zip"), indexDir);
    Directory dir = newFSDirectory(indexDir);

    DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
@ -114,21 +282,41 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
  // Used to create a fresh taxonomy index with StoredFields
  @Ignore
  public void testCreateOldTaxonomy() throws IOException {
-    createOldTaxonomyIndex(oldTaxonomyIndexName);
+    createOldTaxonomyIndex(OLD_TAXONOMY_INDEX_NAME, OLD_INDEX_NAME);
  }

-  private void createOldTaxonomyIndex(String dirName) throws IOException {
-    Path indexDir = getIndexDir().resolve(dirName);
-    Files.deleteIfExists(indexDir);
-    Directory dir = newFSDirectory(indexDir);
+  private void createOldTaxonomyIndex(String taxoDirName, String indexDirName) throws IOException {
+    Path taxoPath = getIndexDir().resolve(taxoDirName);
+    Path indexPath = getIndexDir().resolve(indexDirName);
+    Files.deleteIfExists(taxoPath);
+    Files.deleteIfExists(indexPath);

-    TaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
+    Directory taxoDir = newFSDirectory(taxoPath);
+    Directory indexDir = newFSDirectory(indexPath);

-    writer.addCategory(new FacetLabel("a"));
-    writer.addCategory(new FacetLabel("b"));
-    writer.commit();
-    writer.close();
-    dir.close();
+    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+    FacetsConfig facetsConfig = new FacetsConfig();
+    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), indexDir);
+
+    Document doc = new Document();
+    doc.add(new FacetField("f1", "foo"));
+    doc.add(new FacetField("f2", "foo"));
+    indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
+
+    doc = new Document();
+    doc.add(new FacetField("f1", "bar"));
+    indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
+
+    doc = new Document();
+    doc.add(new FacetField("f2", "bar"));
+    indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
+
+    taxoWriter.addCategory(new FacetLabel("a"));
+    taxoWriter.addCategory(new FacetLabel("b"));
+
+    indexWriter.commit();
+    taxoWriter.commit();
+    IOUtils.close(indexWriter, taxoWriter, indexDir, taxoDir);
  }

  private Path getIndexDir() {
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/index.8.11.0-cfs.zip
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/index.8.11.0-cfs.zip
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.10.0-cfs.zip
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.11.0-cfs.zip
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.11.0-cfs.zip