Add TaxonomyReader#getBulkOrdinals method (#12180) (#12769)

2023-11-09 15:02:02 +00:00 · 2023-11-09 15:02:02 +00:00 · fb5f491643
parent 570832eb74
commit fb5f491643
4 changed files with 311 additions and 75 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -67,6 +67,9 @@ API Changes

 * GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui)

+* GITHUB#12180: Add TaxonomyReader#getBulkOrdinals method to more efficiently retrieve facet ordinals for multiple
+  FacetLabel at once. (Egor Potemkin)
+
 New Features
 ---------------------

--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java
@ -201,6 +201,28 @@ public abstract class TaxonomyReader implements Closeable {
   */
  public abstract int getOrdinal(FacetLabel categoryPath) throws IOException;

+  /**
+   * Returns the ordinals of the categories given as a path. The ordinal is the category's serial
+   * number, an integer which starts with 0 and grows as more categories are added (note that once a
+   * category is added, it can never be deleted).
+   *
+   * <p>The implementation in {@link
+   * org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader} is generally faster than
+   * iteratively calling {@link #getOrdinal(FacetLabel)}
+   *
+   * @return array of the category's' ordinals or {@link #INVALID_ORDINAL} if the category wasn't
+   *     found.
+   */
+  public int[] getBulkOrdinals(FacetLabel... categoryPath) throws IOException {
+    // This is a slow default implementation. DirectoryTaxonomyReader overrides this method to make
+    // it faster.
+    int[] ords = new int[categoryPath.length];
+    for (int i = 0; i < categoryPath.length; i++) {
+      ords[i] = getOrdinal(categoryPath[i]);
+    }
+    return ords;
+  }
+
  /** Returns ordinal for the dim + path. */
  public int getOrdinal(String dim, String... path) throws IOException {
    String[] fullPath = new String[path.length + 1];
@ -218,6 +240,9 @@ public abstract class TaxonomyReader implements Closeable {
   * <p>The implementation in {@link
   * org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader} is generally faster than
   * the default implementation which iteratively calls {@link #getPath(int)}
+   *
+   * <p>Note: this method may change (reorder elements) its parameter, you should avoid reusing the
+   * parameter after the method is called.
   */
  public FacetLabel[] getBulkPath(int... ordinals) throws IOException {
    FacetLabel[] facetLabels = new FacetLabel[ordinals.length];
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
@ -38,14 +38,19 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiTerms;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.ReaderUtil;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.BytesRefComparator;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.StringSorter;

 /**
 * A {@link TaxonomyReader} which retrieves stored taxonomy information from a {@link Directory}.
@ -71,6 +76,11 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
  private final long taxoEpoch; // used in doOpenIfChanged
  private final DirectoryReader indexReader;

+  // We only store the fact that a category exists, not otherwise.
+  // This is required because the caches are shared with new DTR instances
+  // that are allocated from doOpenIfChanged. Therefore, if we only store
+  // information about found categories, we cannot accidentally tell a new
+  // generation of DTR that a category does not exist.
  // TODO: test DoubleBarrelLRUCache and consider using it instead
  private LRUHashMap<FacetLabel, Integer> ordinalCache;
  private LRUHashMap<Integer, FacetLabel> categoryCache;
@ -298,12 +308,6 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
            0);
    if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      ret = docs.docID();
-
-      // We only store the fact that a category exists, not otherwise.
-      // This is required because the caches are shared with new DTR instances
-      // that are allocated from doOpenIfChanged. Therefore, if we only store
-      // information about found categories, we cannot accidentally tell a new
-      // generation of DTR that a category does not exist.
      synchronized (ordinalCache) {
        ordinalCache.put(cp, ret);
      }
@ -312,6 +316,117 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
    return ret;
  }

+  @Override
+  public int[] getBulkOrdinals(FacetLabel... categoryPaths) throws IOException {
+    ensureOpen();
+    if (categoryPaths.length == 0) {
+      return new int[0];
+    }
+    if (categoryPaths.length == 1) {
+      return new int[] {getOrdinal(categoryPaths[0])};
+    }
+    // First try to find results in the cache:
+    int[] result = new int[categoryPaths.length];
+    int[] indexesMissingFromCache = new int[10]; // initial size, will grow when required
+    int numberOfMissingFromCache = 0;
+    FacetLabel cp;
+    Integer res;
+    for (int i = 0; i < categoryPaths.length; i++) {
+      cp = categoryPaths[i];
+      synchronized (ordinalCache) {
+        res = ordinalCache.get(cp);
+      }
+      if (res != null) {
+        if (res < indexReader.maxDoc()) {
+          // Since the cache is shared with DTR instances allocated from
+          // doOpenIfChanged, we need to ensure that the ordinal is one that
+          // this DTR instance recognizes.
+          result[i] = res;
+        } else {
+          // if we get here, it means that the category was found in the cache,
+          // but is not recognized by this TR instance. Therefore, there's no
+          // need to continue search for the path on disk, because we won't find
+          // it there too.
+          result[i] = TaxonomyReader.INVALID_ORDINAL;
+        }
+      } else {
+        indexesMissingFromCache =
+            ArrayUtil.grow(indexesMissingFromCache, numberOfMissingFromCache + 1);
+        indexesMissingFromCache[numberOfMissingFromCache++] = i;
+      }
+    }
+    // all ordinals found in cache
+    if (indexesMissingFromCache.length == 0) {
+      return result;
+    }
+
+    // If we're still here, we have at least one cache miss. We need to fetch the
+    // value from disk, and then also put results in the cache
+
+    // Create array of missing terms, and sort them so that later we scan terms dictionary
+    // forward-only.
+    // Note: similar functionality exists within BytesRefHash and BytesRefArray, but they don't
+    // reuse BytesRefs and assign their own ords. It is cheaper to have custom implementation here.
+    BytesRef[] termsToGet = new BytesRef[numberOfMissingFromCache];
+    for (int i = 0; i < termsToGet.length; i++) {
+      cp = categoryPaths[indexesMissingFromCache[i]];
+      termsToGet[i] = new BytesRef(FacetsConfig.pathToString(cp.components, cp.length));
+    }
+    // sort both terms and their indexes in the input parameter
+    int[] finalMissingFromCache = indexesMissingFromCache;
+
+    new StringSorter(BytesRefComparator.NATURAL) {
+
+      @Override
+      protected void swap(int i, int j) {
+        int tmp = finalMissingFromCache[i];
+        finalMissingFromCache[i] = finalMissingFromCache[j];
+        finalMissingFromCache[j] = tmp;
+        BytesRef tmpBytes = termsToGet[i];
+        termsToGet[i] = termsToGet[j];
+        termsToGet[j] = tmpBytes;
+      }
+
+      @Override
+      protected void get(BytesRefBuilder builder, BytesRef result, int i) {
+        BytesRef ref = termsToGet[i];
+        result.offset = ref.offset;
+        result.length = ref.length;
+        result.bytes = ref.bytes;
+      }
+    }.sort(0, numberOfMissingFromCache);
+
+    TermsEnum te = MultiTerms.getTerms(indexReader, Consts.FULL).iterator();
+    PostingsEnum postings = null;
+    int ord;
+    int resIndex;
+    for (int i = 0; i < numberOfMissingFromCache; i++) {
+      resIndex = indexesMissingFromCache[i];
+      if (te.seekExact(termsToGet[i])) {
+        postings = te.postings(postings, 0);
+        if (postings != null && postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+          ord = postings.docID();
+          result[resIndex] = ord;
+        } else {
+          result[resIndex] = INVALID_ORDINAL;
+        }
+      } else {
+        result[resIndex] = INVALID_ORDINAL;
+      }
+    }
+    // populate cache
+    synchronized (ordinalCache) {
+      for (int i = 0; i < numberOfMissingFromCache; i++) {
+        resIndex = indexesMissingFromCache[i];
+        ord = result[resIndex];
+        if (ord != INVALID_ORDINAL) {
+          ordinalCache.put(categoryPaths[resIndex], ord);
+        }
+      }
+    }
+    return result;
+  }
+
  @Override
  public FacetLabel getPath(int ordinal) throws IOException {
    ensureOpen();
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
@ -16,13 +16,19 @@
 */
 package org.apache.lucene.facet.taxonomy.directory;

+import static org.apache.lucene.facet.taxonomy.TaxonomyReader.INVALID_ORDINAL;
+
 import com.carrotsearch.randomizedtesting.RandomizedTest;
 import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Random;
 import java.util.Set;
+import java.util.stream.IntStream;
 import org.apache.lucene.facet.FacetTestCase;
 import org.apache.lucene.facet.taxonomy.FacetLabel;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
@ -42,6 +48,9 @@ import org.junit.Test;

 public class TestDirectoryTaxonomyReader extends FacetTestCase {

+  private static FacetLabel ILLEGAL_PATH =
+      new FacetLabel("PATH_THAT_CAUSED_IllegalArgumentException");
+
  @Test
  public void testCloseAfterIncRef() throws Exception {
    Directory dir = newDirectory();
@ -356,8 +365,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {

    DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(dir);
    // fill r1's caches
-    assertEquals(1, r1.getOrdinal(cp_a));
-    assertEquals(cp_a, r1.getPath(1));
+    assertPathsAndOrdinals(r1, new int[] {1}, new FacetLabel[] {cp_a});

    // now recreate, add a different category
    writer = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
@ -369,16 +377,15 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
    assertNotNull(r2);

    // fill r2's caches
-    assertEquals(1, r2.getOrdinal(cp_b));
-    assertEquals(cp_b, r2.getPath(1));
+    assertPathsAndOrdinals(r2, new int[] {1}, new FacetLabel[] {cp_b});

    // check that r1 doesn't see cp_b
-    assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
-    assertEquals(cp_a, r1.getPath(1));
+    assertGettingOrdinals(r1, new int[] {1, INVALID_ORDINAL}, new FacetLabel[] {cp_a, cp_b});
+    assertGettingPaths(r1, new FacetLabel[] {cp_a, ILLEGAL_PATH}, new int[] {1, 2});

    // check that r2 doesn't see cp_a
-    assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
-    assertEquals(cp_b, r2.getPath(1));
+    assertGettingOrdinals(r2, new int[] {INVALID_ORDINAL, 1}, new FacetLabel[] {cp_a, cp_b});
+    assertGettingPaths(r2, new FacetLabel[] {cp_b, ILLEGAL_PATH}, new int[] {1, 2});

    r2.close();
    r1.close();
@ -399,8 +406,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
      DirectoryTaxonomyReader r1 =
          nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
      // fill r1's caches
-      assertEquals(1, r1.getOrdinal(cp_a));
-      assertEquals(cp_a, r1.getPath(1));
+      assertPathsAndOrdinals(r1, new int[] {1}, new FacetLabel[] {cp_a});

      FacetLabel cp_b = new FacetLabel("b");
      writer.addCategory(cp_b);
@ -410,12 +416,11 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
      assertNotNull(r2);

      // add r2's categories to the caches
-      assertEquals(2, r2.getOrdinal(cp_b));
-      assertEquals(cp_b, r2.getPath(2));
+      assertPathsAndOrdinals(r2, new int[] {1, 2}, new FacetLabel[] {cp_a, cp_b});

      // check that r1 doesn't see cp_b
-      assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
-      expectThrows(IllegalArgumentException.class, () -> r1.getPath(2));
+      assertGettingOrdinals(r1, new int[] {1, INVALID_ORDINAL}, new FacetLabel[] {cp_a, cp_b});
+      assertGettingPaths(r1, new FacetLabel[] {cp_a, ILLEGAL_PATH}, new int[] {1, 2});

      r1.close();
      r2.close();
@ -445,8 +450,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
      DirectoryTaxonomyReader r1 =
          nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
      // fill r1's caches
-      assertEquals(1, r1.getOrdinal(cp_a));
-      assertEquals(cp_a, r1.getPath(1));
+      assertPathsAndOrdinals(r1, new int[] {1}, new FacetLabel[] {cp_a});

      // now replace taxonomy
      writer.replaceTaxonomy(src);
@ -456,16 +460,15 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
      assertNotNull(r2);

      // fill r2's caches
-      assertEquals(1, r2.getOrdinal(cp_b));
-      assertEquals(cp_b, r2.getPath(1));
+      assertPathsAndOrdinals(r2, new int[] {1}, new FacetLabel[] {cp_b});

      // check that r1 doesn't see cp_b
-      assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
-      assertEquals(cp_a, r1.getPath(1));
+      assertGettingOrdinals(r1, new int[] {1, INVALID_ORDINAL}, new FacetLabel[] {cp_a, cp_b});
+      assertGettingPaths(r1, new FacetLabel[] {cp_a, ILLEGAL_PATH}, new int[] {1, 2});

      // check that r2 doesn't see cp_a
-      assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
-      assertEquals(cp_b, r2.getPath(1));
+      assertGettingOrdinals(r2, new int[] {INVALID_ORDINAL, 1}, new FacetLabel[] {cp_a, cp_b});
+      assertGettingPaths(r2, new FacetLabel[] {cp_b, ILLEGAL_PATH}, new int[] {1, 2});

      r2.close();
      r1.close();
@ -476,6 +479,86 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
    src.close();
  }

+  private void assertGettingOrdinals(
+      DirectoryTaxonomyReader reader, int[] expectedOrds, FacetLabel[] sourcePaths)
+      throws IOException {
+    // To exercise mix of cache hit and cache misses for getOrdinal and getBulkOrdinals this method:
+    // 1. Randomly gets a few ords using sequential calls.
+    // 2. Call bulk get method.
+    // 3. Continue sequential calls for the remaining items.
+    assertEquals(expectedOrds.length, sourcePaths.length);
+    int bulkOperationsIteration = random().nextInt(sourcePaths.length);
+    List<Integer> indexesShuffled =
+        new ArrayList<>(IntStream.range(0, sourcePaths.length).boxed().toList());
+    Collections.shuffle(indexesShuffled, random());
+
+    for (int i = 0; i < bulkOperationsIteration; i++) {
+      int nextIndex = indexesShuffled.get(i);
+      assertEquals(expectedOrds[nextIndex], reader.getOrdinal(sourcePaths[nextIndex]));
+    }
+
+    int[] bulkOrdResult = reader.getBulkOrdinals(sourcePaths);
+    assertArrayEquals(expectedOrds, bulkOrdResult);
+
+    for (int i = bulkOperationsIteration; i < sourcePaths.length; i++) {
+      int nextIndex = indexesShuffled.get(i);
+      assertEquals(expectedOrds[nextIndex], reader.getOrdinal(sourcePaths[nextIndex]));
+    }
+  }
+
+  private void assertGettingPaths(
+      DirectoryTaxonomyReader reader, FacetLabel[] expectedPaths, int[] sourceOrds)
+      throws IOException {
+    // To exercise mix of cache hit and cache misses for getPath and getBulkPath this method:
+    // 1. Randomly gets a few paths using sequential calls.
+    // 2. Call bulk get method.
+    // 3. Continue sequential calls for the remaining items.
+    // Note: expectedPaths should refer to ILLEGAL_PATH for ords from sourceOrds that are expected
+    // to throw IllegalArgumentException
+    assertEquals(expectedPaths.length, sourceOrds.length);
+    int bulkOperationsIteration = random().nextInt(sourceOrds.length);
+    List<Integer> indexesShuffled =
+        new ArrayList<>(IntStream.range(0, sourceOrds.length).boxed().toList());
+    Collections.shuffle(indexesShuffled, random());
+
+    boolean illegalPathExceptionIsExpected =
+        Arrays.stream(expectedPaths).anyMatch(x -> x == ILLEGAL_PATH);
+    for (int i = 0; i < bulkOperationsIteration; i++) {
+      int nextIndex = indexesShuffled.get(i);
+      if (expectedPaths[nextIndex] == ILLEGAL_PATH) {
+        expectThrows(IllegalArgumentException.class, () -> reader.getPath(sourceOrds[nextIndex]));
+      } else {
+        assertEquals(expectedPaths[nextIndex], reader.getPath(sourceOrds[nextIndex]));
+      }
+    }
+
+    if (illegalPathExceptionIsExpected) {
+      expectThrows(IllegalArgumentException.class, () -> reader.getBulkPath(sourceOrds));
+    } else {
+      // clone because getBulkPath changes order of param's elements
+      int[] sourceOrdsCopy = sourceOrds.clone();
+      FacetLabel[] bulkPathsResult = reader.getBulkPath(sourceOrdsCopy);
+      assertArrayEquals(expectedPaths, bulkPathsResult);
+    }
+
+    for (int i = bulkOperationsIteration; i < sourceOrds.length; i++) {
+      int nextIndex = indexesShuffled.get(i);
+      if (expectedPaths[nextIndex] == ILLEGAL_PATH) {
+        expectThrows(IllegalArgumentException.class, () -> reader.getPath(sourceOrds[nextIndex]));
+      } else {
+        assertEquals(expectedPaths[nextIndex], reader.getPath(sourceOrds[nextIndex]));
+      }
+    }
+  }
+
+  private void assertPathsAndOrdinals(
+      DirectoryTaxonomyReader reader, int[] ords, FacetLabel[] paths) throws IOException {
+    // use this method to assert "symmetric" ordinals and paths: when source ords and paths match
+    // expected ords and paths. This works for valid ords and paths that exist in the index.
+    assertGettingPaths(reader, paths, ords);
+    assertGettingOrdinals(reader, ords, paths);
+  }
+
  @Test
  public void testGetChildren() throws Exception {
    Directory dir = newDirectory();
@ -503,15 +586,15 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {

    // non existing category
    ChildrenIterator it = taxoReader.getChildren(taxoReader.getOrdinal(new FacetLabel("invalid")));
-    assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
+    assertEquals(INVALID_ORDINAL, it.next());

    // a category with no children
    it = taxoReader.getChildren(taxoReader.getOrdinal(new FacetLabel("c")));
-    assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
+    assertEquals(INVALID_ORDINAL, it.next());

    // arbitrary negative ordinal
    it = taxoReader.getChildren(-2);
-    assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
+    assertEquals(INVALID_ORDINAL, it.next());

    // root's children
    Set<String> roots = new HashSet<>(Arrays.asList("a", "b", "c"));
@ -521,7 +604,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
      assertEquals(1, root.length);
      assertTrue(roots.remove(root.components[0]));
    }
-    assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
+    assertEquals(INVALID_ORDINAL, it.next());

    for (int i = 0; i < 2; i++) {
      FacetLabel cp = i == 0 ? new FacetLabel("a") : new FacetLabel("b");
@ -529,7 +612,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
      it = taxoReader.getChildren(ordinal);
      int numChildren = 0;
      int child;
-      while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
+      while ((child = it.next()) != INVALID_ORDINAL) {
        FacetLabel path = taxoReader.getPath(child);
        assertEquals(2, path.length);
        assertEquals(path.components[0], i == 0 ? "a" : "b");
@ -543,6 +626,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
    dir.close();
  }

+  @Test
  public void testAccountable() throws Exception {
    Directory dir = newDirectory();
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
@ -570,16 +654,20 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
    dir.close();
  }

-  public void testCallingBulkPathReturnsCorrectResult() throws Exception {
+  @Test
+  public void testGetPathAndOrdinalsRandomMultithreading() throws Exception {
    Directory src = newDirectory();
    DirectoryTaxonomyWriter w = new DirectoryTaxonomyWriter(src);
-    String randomArray[] = new String[RandomizedTest.randomIntBetween(1, 1000)];
+    final int maxNumberOfLabelsToIndex = 1000;
+    final int maxNumberOfUniqueLabelsToIndex = maxNumberOfLabelsToIndex / 2;
+    final int cacheSize = maxNumberOfUniqueLabelsToIndex / 2; // to cause some cache evictions
+    String randomArray[] = new String[RandomizedTest.randomIntBetween(1, maxNumberOfLabelsToIndex)];
    // adding a smaller bound on ints ensures that we will have some duplicate ordinals in random
    // test cases
-    Arrays.setAll(randomArray, i -> Integer.toString(random().nextInt(500)));
+    Arrays.setAll(
+        randomArray, i -> Integer.toString(random().nextInt(maxNumberOfUniqueLabelsToIndex)));

    FacetLabel allPaths[] = new FacetLabel[randomArray.length];
-    int allOrdinals[] = new int[randomArray.length];

    for (int i = 0; i < randomArray.length; i++) {
      allPaths[i] = new FacetLabel(randomArray[i]);
@ -593,53 +681,58 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
    w.close();

    DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(src);
+    r1.setCacheSize(cacheSize);

-    for (int i = 0; i < allPaths.length; i++) {
-      allOrdinals[i] = r1.getOrdinal(allPaths[i]);
-    }
+    int allOrdinals[] = r1.getBulkOrdinals(allPaths);

-    // create multiple threads to check result correctness and thread contention in the cache
-    Thread[] addThreads = new Thread[RandomNumbers.randomIntBetween(random(), 1, 12)];
-    for (int z = 0; z < addThreads.length; z++) {
-      addThreads[z] =
-          new Thread() {
-            @Override
-            public void run() {
-              // each thread iterates for numThreadIterations times
-              int numThreadIterations = random().nextInt(10);
-              for (int threadIterations = 0;
-                  threadIterations < numThreadIterations;
-                  threadIterations++) {
+    // Assert getPath and getBulkPath first, then assert getOrdinal and getBulkOrdinals.
+    // Create multiple threads to check result correctness and thread contention in the cache.
+    for (boolean assertGettingOrdinals : new boolean[] {false, true}) {
+      Thread[] addThreads = new Thread[RandomNumbers.randomIntBetween(random(), 1, 12)];
+      for (int z = 0; z < addThreads.length; z++) {
+        addThreads[z] =
+            new Thread() {
+              @Override
+              public void run() {
+                // each thread iterates for numThreadIterations times
+                int numThreadIterations = random().nextInt(10);
+                for (int threadIterations = 0;
+                    threadIterations < numThreadIterations;
+                    threadIterations++) {

-                // length of the FacetLabel array that we are going to check
-                int numOfOrdinalsToCheck = random().nextInt(allOrdinals.length);
-                int[] ordinals = new int[numOfOrdinalsToCheck];
-                FacetLabel[] path = new FacetLabel[numOfOrdinalsToCheck];
+                  // length of the FacetLabel array that we are going to check
+                  int numOfOrdinalsToCheck = random().nextInt(allOrdinals.length);
+                  int[] ordinals = new int[numOfOrdinalsToCheck];
+                  FacetLabel[] path = new FacetLabel[numOfOrdinalsToCheck];

-                for (int i = 0; i < numOfOrdinalsToCheck; i++) {
-                  // we deliberately allow it to choose repeat indexes as this will exercise the
-                  // cache
-                  int ordinalIndex = random().nextInt(allOrdinals.length);
-                  ordinals[i] = allOrdinals[ordinalIndex];
-                  path[i] = allPaths[ordinalIndex];
-                }
+                  for (int i = 0; i < numOfOrdinalsToCheck; i++) {
+                    // we deliberately allow it to choose repeat indexes as this will exercise the
+                    // cache
+                    int ordinalIndex = random().nextInt(allOrdinals.length);
+                    ordinals[i] = allOrdinals[ordinalIndex];
+                    path[i] = allPaths[ordinalIndex];
+                  }

-                try {
-                  // main check for correctness is done here
-                  assertArrayEquals(path, r1.getBulkPath(ordinals));
-                } catch (IOException e) {
-                  // this should ideally never occur, but if it does just rethrow the error to the
-                  // caller
-                  throw new RuntimeException(e);
+                  try {
+                    // main check for correctness is done here
+                    if (assertGettingOrdinals) {
+                      assertGettingOrdinals(r1, ordinals, path);
+                    } else {
+                      assertGettingPaths(r1, path, ordinals);
+                    }
+                  } catch (IOException e) {
+                    // this should ideally never occur, but if it does just rethrow the error to the
+                    // caller
+                    throw new RuntimeException(e);
+                  }
                }
              }
-            }
-          };
+            };
+      }
+      for (Thread t : addThreads) t.start();
+      for (Thread t : addThreads) t.join();
    }

-    for (Thread t : addThreads) t.start();
-    for (Thread t : addThreads) t.join();
-
    r1.close();
    src.close();
  }