mirror of https://github.com/apache/lucene.git
parent
570832eb74
commit
fb5f491643
|
@ -67,6 +67,9 @@ API Changes
|
|||
|
||||
* GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui)
|
||||
|
||||
* GITHUB#12180: Add TaxonomyReader#getBulkOrdinals method to more efficiently retrieve facet ordinals for multiple
|
||||
FacetLabel at once. (Egor Potemkin)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -201,6 +201,28 @@ public abstract class TaxonomyReader implements Closeable {
|
|||
*/
|
||||
public abstract int getOrdinal(FacetLabel categoryPath) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the ordinals of the categories given as a path. The ordinal is the category's serial
|
||||
* number, an integer which starts with 0 and grows as more categories are added (note that once a
|
||||
* category is added, it can never be deleted).
|
||||
*
|
||||
* <p>The implementation in {@link
|
||||
* org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader} is generally faster than
|
||||
* iteratively calling {@link #getOrdinal(FacetLabel)}
|
||||
*
|
||||
* @return array of the category's' ordinals or {@link #INVALID_ORDINAL} if the category wasn't
|
||||
* found.
|
||||
*/
|
||||
public int[] getBulkOrdinals(FacetLabel... categoryPath) throws IOException {
|
||||
// This is a slow default implementation. DirectoryTaxonomyReader overrides this method to make
|
||||
// it faster.
|
||||
int[] ords = new int[categoryPath.length];
|
||||
for (int i = 0; i < categoryPath.length; i++) {
|
||||
ords[i] = getOrdinal(categoryPath[i]);
|
||||
}
|
||||
return ords;
|
||||
}
|
||||
|
||||
/** Returns ordinal for the dim + path. */
|
||||
public int getOrdinal(String dim, String... path) throws IOException {
|
||||
String[] fullPath = new String[path.length + 1];
|
||||
|
@ -218,6 +240,9 @@ public abstract class TaxonomyReader implements Closeable {
|
|||
* <p>The implementation in {@link
|
||||
* org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader} is generally faster than
|
||||
* the default implementation which iteratively calls {@link #getPath(int)}
|
||||
*
|
||||
* <p>Note: this method may change (reorder elements) its parameter, you should avoid reusing the
|
||||
* parameter after the method is called.
|
||||
*/
|
||||
public FacetLabel[] getBulkPath(int... ordinals) throws IOException {
|
||||
FacetLabel[] facetLabels = new FacetLabel[ordinals.length];
|
||||
|
|
|
@ -38,14 +38,19 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.MultiTerms;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.BytesRefComparator;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.StringSorter;
|
||||
|
||||
/**
|
||||
* A {@link TaxonomyReader} which retrieves stored taxonomy information from a {@link Directory}.
|
||||
|
@ -71,6 +76,11 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
private final long taxoEpoch; // used in doOpenIfChanged
|
||||
private final DirectoryReader indexReader;
|
||||
|
||||
// We only store the fact that a category exists, not otherwise.
|
||||
// This is required because the caches are shared with new DTR instances
|
||||
// that are allocated from doOpenIfChanged. Therefore, if we only store
|
||||
// information about found categories, we cannot accidentally tell a new
|
||||
// generation of DTR that a category does not exist.
|
||||
// TODO: test DoubleBarrelLRUCache and consider using it instead
|
||||
private LRUHashMap<FacetLabel, Integer> ordinalCache;
|
||||
private LRUHashMap<Integer, FacetLabel> categoryCache;
|
||||
|
@ -298,12 +308,6 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
0);
|
||||
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
ret = docs.docID();
|
||||
|
||||
// We only store the fact that a category exists, not otherwise.
|
||||
// This is required because the caches are shared with new DTR instances
|
||||
// that are allocated from doOpenIfChanged. Therefore, if we only store
|
||||
// information about found categories, we cannot accidentally tell a new
|
||||
// generation of DTR that a category does not exist.
|
||||
synchronized (ordinalCache) {
|
||||
ordinalCache.put(cp, ret);
|
||||
}
|
||||
|
@ -312,6 +316,117 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] getBulkOrdinals(FacetLabel... categoryPaths) throws IOException {
|
||||
ensureOpen();
|
||||
if (categoryPaths.length == 0) {
|
||||
return new int[0];
|
||||
}
|
||||
if (categoryPaths.length == 1) {
|
||||
return new int[] {getOrdinal(categoryPaths[0])};
|
||||
}
|
||||
// First try to find results in the cache:
|
||||
int[] result = new int[categoryPaths.length];
|
||||
int[] indexesMissingFromCache = new int[10]; // initial size, will grow when required
|
||||
int numberOfMissingFromCache = 0;
|
||||
FacetLabel cp;
|
||||
Integer res;
|
||||
for (int i = 0; i < categoryPaths.length; i++) {
|
||||
cp = categoryPaths[i];
|
||||
synchronized (ordinalCache) {
|
||||
res = ordinalCache.get(cp);
|
||||
}
|
||||
if (res != null) {
|
||||
if (res < indexReader.maxDoc()) {
|
||||
// Since the cache is shared with DTR instances allocated from
|
||||
// doOpenIfChanged, we need to ensure that the ordinal is one that
|
||||
// this DTR instance recognizes.
|
||||
result[i] = res;
|
||||
} else {
|
||||
// if we get here, it means that the category was found in the cache,
|
||||
// but is not recognized by this TR instance. Therefore, there's no
|
||||
// need to continue search for the path on disk, because we won't find
|
||||
// it there too.
|
||||
result[i] = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
} else {
|
||||
indexesMissingFromCache =
|
||||
ArrayUtil.grow(indexesMissingFromCache, numberOfMissingFromCache + 1);
|
||||
indexesMissingFromCache[numberOfMissingFromCache++] = i;
|
||||
}
|
||||
}
|
||||
// all ordinals found in cache
|
||||
if (indexesMissingFromCache.length == 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// If we're still here, we have at least one cache miss. We need to fetch the
|
||||
// value from disk, and then also put results in the cache
|
||||
|
||||
// Create array of missing terms, and sort them so that later we scan terms dictionary
|
||||
// forward-only.
|
||||
// Note: similar functionality exists within BytesRefHash and BytesRefArray, but they don't
|
||||
// reuse BytesRefs and assign their own ords. It is cheaper to have custom implementation here.
|
||||
BytesRef[] termsToGet = new BytesRef[numberOfMissingFromCache];
|
||||
for (int i = 0; i < termsToGet.length; i++) {
|
||||
cp = categoryPaths[indexesMissingFromCache[i]];
|
||||
termsToGet[i] = new BytesRef(FacetsConfig.pathToString(cp.components, cp.length));
|
||||
}
|
||||
// sort both terms and their indexes in the input parameter
|
||||
int[] finalMissingFromCache = indexesMissingFromCache;
|
||||
|
||||
new StringSorter(BytesRefComparator.NATURAL) {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int tmp = finalMissingFromCache[i];
|
||||
finalMissingFromCache[i] = finalMissingFromCache[j];
|
||||
finalMissingFromCache[j] = tmp;
|
||||
BytesRef tmpBytes = termsToGet[i];
|
||||
termsToGet[i] = termsToGet[j];
|
||||
termsToGet[j] = tmpBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void get(BytesRefBuilder builder, BytesRef result, int i) {
|
||||
BytesRef ref = termsToGet[i];
|
||||
result.offset = ref.offset;
|
||||
result.length = ref.length;
|
||||
result.bytes = ref.bytes;
|
||||
}
|
||||
}.sort(0, numberOfMissingFromCache);
|
||||
|
||||
TermsEnum te = MultiTerms.getTerms(indexReader, Consts.FULL).iterator();
|
||||
PostingsEnum postings = null;
|
||||
int ord;
|
||||
int resIndex;
|
||||
for (int i = 0; i < numberOfMissingFromCache; i++) {
|
||||
resIndex = indexesMissingFromCache[i];
|
||||
if (te.seekExact(termsToGet[i])) {
|
||||
postings = te.postings(postings, 0);
|
||||
if (postings != null && postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
ord = postings.docID();
|
||||
result[resIndex] = ord;
|
||||
} else {
|
||||
result[resIndex] = INVALID_ORDINAL;
|
||||
}
|
||||
} else {
|
||||
result[resIndex] = INVALID_ORDINAL;
|
||||
}
|
||||
}
|
||||
// populate cache
|
||||
synchronized (ordinalCache) {
|
||||
for (int i = 0; i < numberOfMissingFromCache; i++) {
|
||||
resIndex = indexesMissingFromCache[i];
|
||||
ord = result[resIndex];
|
||||
if (ord != INVALID_ORDINAL) {
|
||||
ordinalCache.put(categoryPaths[resIndex], ord);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetLabel getPath(int ordinal) throws IOException {
|
||||
ensureOpen();
|
||||
|
|
|
@ -16,13 +16,19 @@
|
|||
*/
|
||||
package org.apache.lucene.facet.taxonomy.directory;
|
||||
|
||||
import static org.apache.lucene.facet.taxonomy.TaxonomyReader.INVALID_ORDINAL;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.RandomizedTest;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.stream.IntStream;
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
@ -42,6 +48,9 @@ import org.junit.Test;
|
|||
|
||||
public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
||||
|
||||
private static FacetLabel ILLEGAL_PATH =
|
||||
new FacetLabel("PATH_THAT_CAUSED_IllegalArgumentException");
|
||||
|
||||
@Test
|
||||
public void testCloseAfterIncRef() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
@ -356,8 +365,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
|
||||
DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(dir);
|
||||
// fill r1's caches
|
||||
assertEquals(1, r1.getOrdinal(cp_a));
|
||||
assertEquals(cp_a, r1.getPath(1));
|
||||
assertPathsAndOrdinals(r1, new int[] {1}, new FacetLabel[] {cp_a});
|
||||
|
||||
// now recreate, add a different category
|
||||
writer = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
|
||||
|
@ -369,16 +377,15 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
assertNotNull(r2);
|
||||
|
||||
// fill r2's caches
|
||||
assertEquals(1, r2.getOrdinal(cp_b));
|
||||
assertEquals(cp_b, r2.getPath(1));
|
||||
assertPathsAndOrdinals(r2, new int[] {1}, new FacetLabel[] {cp_b});
|
||||
|
||||
// check that r1 doesn't see cp_b
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
|
||||
assertEquals(cp_a, r1.getPath(1));
|
||||
assertGettingOrdinals(r1, new int[] {1, INVALID_ORDINAL}, new FacetLabel[] {cp_a, cp_b});
|
||||
assertGettingPaths(r1, new FacetLabel[] {cp_a, ILLEGAL_PATH}, new int[] {1, 2});
|
||||
|
||||
// check that r2 doesn't see cp_a
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
|
||||
assertEquals(cp_b, r2.getPath(1));
|
||||
assertGettingOrdinals(r2, new int[] {INVALID_ORDINAL, 1}, new FacetLabel[] {cp_a, cp_b});
|
||||
assertGettingPaths(r2, new FacetLabel[] {cp_b, ILLEGAL_PATH}, new int[] {1, 2});
|
||||
|
||||
r2.close();
|
||||
r1.close();
|
||||
|
@ -399,8 +406,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
DirectoryTaxonomyReader r1 =
|
||||
nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
|
||||
// fill r1's caches
|
||||
assertEquals(1, r1.getOrdinal(cp_a));
|
||||
assertEquals(cp_a, r1.getPath(1));
|
||||
assertPathsAndOrdinals(r1, new int[] {1}, new FacetLabel[] {cp_a});
|
||||
|
||||
FacetLabel cp_b = new FacetLabel("b");
|
||||
writer.addCategory(cp_b);
|
||||
|
@ -410,12 +416,11 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
assertNotNull(r2);
|
||||
|
||||
// add r2's categories to the caches
|
||||
assertEquals(2, r2.getOrdinal(cp_b));
|
||||
assertEquals(cp_b, r2.getPath(2));
|
||||
assertPathsAndOrdinals(r2, new int[] {1, 2}, new FacetLabel[] {cp_a, cp_b});
|
||||
|
||||
// check that r1 doesn't see cp_b
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
|
||||
expectThrows(IllegalArgumentException.class, () -> r1.getPath(2));
|
||||
assertGettingOrdinals(r1, new int[] {1, INVALID_ORDINAL}, new FacetLabel[] {cp_a, cp_b});
|
||||
assertGettingPaths(r1, new FacetLabel[] {cp_a, ILLEGAL_PATH}, new int[] {1, 2});
|
||||
|
||||
r1.close();
|
||||
r2.close();
|
||||
|
@ -445,8 +450,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
DirectoryTaxonomyReader r1 =
|
||||
nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
|
||||
// fill r1's caches
|
||||
assertEquals(1, r1.getOrdinal(cp_a));
|
||||
assertEquals(cp_a, r1.getPath(1));
|
||||
assertPathsAndOrdinals(r1, new int[] {1}, new FacetLabel[] {cp_a});
|
||||
|
||||
// now replace taxonomy
|
||||
writer.replaceTaxonomy(src);
|
||||
|
@ -456,16 +460,15 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
assertNotNull(r2);
|
||||
|
||||
// fill r2's caches
|
||||
assertEquals(1, r2.getOrdinal(cp_b));
|
||||
assertEquals(cp_b, r2.getPath(1));
|
||||
assertPathsAndOrdinals(r2, new int[] {1}, new FacetLabel[] {cp_b});
|
||||
|
||||
// check that r1 doesn't see cp_b
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
|
||||
assertEquals(cp_a, r1.getPath(1));
|
||||
assertGettingOrdinals(r1, new int[] {1, INVALID_ORDINAL}, new FacetLabel[] {cp_a, cp_b});
|
||||
assertGettingPaths(r1, new FacetLabel[] {cp_a, ILLEGAL_PATH}, new int[] {1, 2});
|
||||
|
||||
// check that r2 doesn't see cp_a
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
|
||||
assertEquals(cp_b, r2.getPath(1));
|
||||
assertGettingOrdinals(r2, new int[] {INVALID_ORDINAL, 1}, new FacetLabel[] {cp_a, cp_b});
|
||||
assertGettingPaths(r2, new FacetLabel[] {cp_b, ILLEGAL_PATH}, new int[] {1, 2});
|
||||
|
||||
r2.close();
|
||||
r1.close();
|
||||
|
@ -476,6 +479,86 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
src.close();
|
||||
}
|
||||
|
||||
private void assertGettingOrdinals(
|
||||
DirectoryTaxonomyReader reader, int[] expectedOrds, FacetLabel[] sourcePaths)
|
||||
throws IOException {
|
||||
// To exercise mix of cache hit and cache misses for getOrdinal and getBulkOrdinals this method:
|
||||
// 1. Randomly gets a few ords using sequential calls.
|
||||
// 2. Call bulk get method.
|
||||
// 3. Continue sequential calls for the remaining items.
|
||||
assertEquals(expectedOrds.length, sourcePaths.length);
|
||||
int bulkOperationsIteration = random().nextInt(sourcePaths.length);
|
||||
List<Integer> indexesShuffled =
|
||||
new ArrayList<>(IntStream.range(0, sourcePaths.length).boxed().toList());
|
||||
Collections.shuffle(indexesShuffled, random());
|
||||
|
||||
for (int i = 0; i < bulkOperationsIteration; i++) {
|
||||
int nextIndex = indexesShuffled.get(i);
|
||||
assertEquals(expectedOrds[nextIndex], reader.getOrdinal(sourcePaths[nextIndex]));
|
||||
}
|
||||
|
||||
int[] bulkOrdResult = reader.getBulkOrdinals(sourcePaths);
|
||||
assertArrayEquals(expectedOrds, bulkOrdResult);
|
||||
|
||||
for (int i = bulkOperationsIteration; i < sourcePaths.length; i++) {
|
||||
int nextIndex = indexesShuffled.get(i);
|
||||
assertEquals(expectedOrds[nextIndex], reader.getOrdinal(sourcePaths[nextIndex]));
|
||||
}
|
||||
}
|
||||
|
||||
private void assertGettingPaths(
|
||||
DirectoryTaxonomyReader reader, FacetLabel[] expectedPaths, int[] sourceOrds)
|
||||
throws IOException {
|
||||
// To exercise mix of cache hit and cache misses for getPath and getBulkPath this method:
|
||||
// 1. Randomly gets a few paths using sequential calls.
|
||||
// 2. Call bulk get method.
|
||||
// 3. Continue sequential calls for the remaining items.
|
||||
// Note: expectedPaths should refer to ILLEGAL_PATH for ords from sourceOrds that are expected
|
||||
// to throw IllegalArgumentException
|
||||
assertEquals(expectedPaths.length, sourceOrds.length);
|
||||
int bulkOperationsIteration = random().nextInt(sourceOrds.length);
|
||||
List<Integer> indexesShuffled =
|
||||
new ArrayList<>(IntStream.range(0, sourceOrds.length).boxed().toList());
|
||||
Collections.shuffle(indexesShuffled, random());
|
||||
|
||||
boolean illegalPathExceptionIsExpected =
|
||||
Arrays.stream(expectedPaths).anyMatch(x -> x == ILLEGAL_PATH);
|
||||
for (int i = 0; i < bulkOperationsIteration; i++) {
|
||||
int nextIndex = indexesShuffled.get(i);
|
||||
if (expectedPaths[nextIndex] == ILLEGAL_PATH) {
|
||||
expectThrows(IllegalArgumentException.class, () -> reader.getPath(sourceOrds[nextIndex]));
|
||||
} else {
|
||||
assertEquals(expectedPaths[nextIndex], reader.getPath(sourceOrds[nextIndex]));
|
||||
}
|
||||
}
|
||||
|
||||
if (illegalPathExceptionIsExpected) {
|
||||
expectThrows(IllegalArgumentException.class, () -> reader.getBulkPath(sourceOrds));
|
||||
} else {
|
||||
// clone because getBulkPath changes order of param's elements
|
||||
int[] sourceOrdsCopy = sourceOrds.clone();
|
||||
FacetLabel[] bulkPathsResult = reader.getBulkPath(sourceOrdsCopy);
|
||||
assertArrayEquals(expectedPaths, bulkPathsResult);
|
||||
}
|
||||
|
||||
for (int i = bulkOperationsIteration; i < sourceOrds.length; i++) {
|
||||
int nextIndex = indexesShuffled.get(i);
|
||||
if (expectedPaths[nextIndex] == ILLEGAL_PATH) {
|
||||
expectThrows(IllegalArgumentException.class, () -> reader.getPath(sourceOrds[nextIndex]));
|
||||
} else {
|
||||
assertEquals(expectedPaths[nextIndex], reader.getPath(sourceOrds[nextIndex]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void assertPathsAndOrdinals(
|
||||
DirectoryTaxonomyReader reader, int[] ords, FacetLabel[] paths) throws IOException {
|
||||
// use this method to assert "symmetric" ordinals and paths: when source ords and paths match
|
||||
// expected ords and paths. This works for valid ords and paths that exist in the index.
|
||||
assertGettingPaths(reader, paths, ords);
|
||||
assertGettingOrdinals(reader, ords, paths);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetChildren() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
@ -503,15 +586,15 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
|
||||
// non existing category
|
||||
ChildrenIterator it = taxoReader.getChildren(taxoReader.getOrdinal(new FacetLabel("invalid")));
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
|
||||
assertEquals(INVALID_ORDINAL, it.next());
|
||||
|
||||
// a category with no children
|
||||
it = taxoReader.getChildren(taxoReader.getOrdinal(new FacetLabel("c")));
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
|
||||
assertEquals(INVALID_ORDINAL, it.next());
|
||||
|
||||
// arbitrary negative ordinal
|
||||
it = taxoReader.getChildren(-2);
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
|
||||
assertEquals(INVALID_ORDINAL, it.next());
|
||||
|
||||
// root's children
|
||||
Set<String> roots = new HashSet<>(Arrays.asList("a", "b", "c"));
|
||||
|
@ -521,7 +604,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
assertEquals(1, root.length);
|
||||
assertTrue(roots.remove(root.components[0]));
|
||||
}
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
|
||||
assertEquals(INVALID_ORDINAL, it.next());
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
FacetLabel cp = i == 0 ? new FacetLabel("a") : new FacetLabel("b");
|
||||
|
@ -529,7 +612,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
it = taxoReader.getChildren(ordinal);
|
||||
int numChildren = 0;
|
||||
int child;
|
||||
while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
|
||||
while ((child = it.next()) != INVALID_ORDINAL) {
|
||||
FacetLabel path = taxoReader.getPath(child);
|
||||
assertEquals(2, path.length);
|
||||
assertEquals(path.components[0], i == 0 ? "a" : "b");
|
||||
|
@ -543,6 +626,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAccountable() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
|
||||
|
@ -570,16 +654,20 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testCallingBulkPathReturnsCorrectResult() throws Exception {
|
||||
@Test
|
||||
public void testGetPathAndOrdinalsRandomMultithreading() throws Exception {
|
||||
Directory src = newDirectory();
|
||||
DirectoryTaxonomyWriter w = new DirectoryTaxonomyWriter(src);
|
||||
String randomArray[] = new String[RandomizedTest.randomIntBetween(1, 1000)];
|
||||
final int maxNumberOfLabelsToIndex = 1000;
|
||||
final int maxNumberOfUniqueLabelsToIndex = maxNumberOfLabelsToIndex / 2;
|
||||
final int cacheSize = maxNumberOfUniqueLabelsToIndex / 2; // to cause some cache evictions
|
||||
String randomArray[] = new String[RandomizedTest.randomIntBetween(1, maxNumberOfLabelsToIndex)];
|
||||
// adding a smaller bound on ints ensures that we will have some duplicate ordinals in random
|
||||
// test cases
|
||||
Arrays.setAll(randomArray, i -> Integer.toString(random().nextInt(500)));
|
||||
Arrays.setAll(
|
||||
randomArray, i -> Integer.toString(random().nextInt(maxNumberOfUniqueLabelsToIndex)));
|
||||
|
||||
FacetLabel allPaths[] = new FacetLabel[randomArray.length];
|
||||
int allOrdinals[] = new int[randomArray.length];
|
||||
|
||||
for (int i = 0; i < randomArray.length; i++) {
|
||||
allPaths[i] = new FacetLabel(randomArray[i]);
|
||||
|
@ -593,53 +681,58 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
w.close();
|
||||
|
||||
DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(src);
|
||||
r1.setCacheSize(cacheSize);
|
||||
|
||||
for (int i = 0; i < allPaths.length; i++) {
|
||||
allOrdinals[i] = r1.getOrdinal(allPaths[i]);
|
||||
}
|
||||
int allOrdinals[] = r1.getBulkOrdinals(allPaths);
|
||||
|
||||
// create multiple threads to check result correctness and thread contention in the cache
|
||||
Thread[] addThreads = new Thread[RandomNumbers.randomIntBetween(random(), 1, 12)];
|
||||
for (int z = 0; z < addThreads.length; z++) {
|
||||
addThreads[z] =
|
||||
new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
// each thread iterates for numThreadIterations times
|
||||
int numThreadIterations = random().nextInt(10);
|
||||
for (int threadIterations = 0;
|
||||
threadIterations < numThreadIterations;
|
||||
threadIterations++) {
|
||||
// Assert getPath and getBulkPath first, then assert getOrdinal and getBulkOrdinals.
|
||||
// Create multiple threads to check result correctness and thread contention in the cache.
|
||||
for (boolean assertGettingOrdinals : new boolean[] {false, true}) {
|
||||
Thread[] addThreads = new Thread[RandomNumbers.randomIntBetween(random(), 1, 12)];
|
||||
for (int z = 0; z < addThreads.length; z++) {
|
||||
addThreads[z] =
|
||||
new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
// each thread iterates for numThreadIterations times
|
||||
int numThreadIterations = random().nextInt(10);
|
||||
for (int threadIterations = 0;
|
||||
threadIterations < numThreadIterations;
|
||||
threadIterations++) {
|
||||
|
||||
// length of the FacetLabel array that we are going to check
|
||||
int numOfOrdinalsToCheck = random().nextInt(allOrdinals.length);
|
||||
int[] ordinals = new int[numOfOrdinalsToCheck];
|
||||
FacetLabel[] path = new FacetLabel[numOfOrdinalsToCheck];
|
||||
// length of the FacetLabel array that we are going to check
|
||||
int numOfOrdinalsToCheck = random().nextInt(allOrdinals.length);
|
||||
int[] ordinals = new int[numOfOrdinalsToCheck];
|
||||
FacetLabel[] path = new FacetLabel[numOfOrdinalsToCheck];
|
||||
|
||||
for (int i = 0; i < numOfOrdinalsToCheck; i++) {
|
||||
// we deliberately allow it to choose repeat indexes as this will exercise the
|
||||
// cache
|
||||
int ordinalIndex = random().nextInt(allOrdinals.length);
|
||||
ordinals[i] = allOrdinals[ordinalIndex];
|
||||
path[i] = allPaths[ordinalIndex];
|
||||
}
|
||||
for (int i = 0; i < numOfOrdinalsToCheck; i++) {
|
||||
// we deliberately allow it to choose repeat indexes as this will exercise the
|
||||
// cache
|
||||
int ordinalIndex = random().nextInt(allOrdinals.length);
|
||||
ordinals[i] = allOrdinals[ordinalIndex];
|
||||
path[i] = allPaths[ordinalIndex];
|
||||
}
|
||||
|
||||
try {
|
||||
// main check for correctness is done here
|
||||
assertArrayEquals(path, r1.getBulkPath(ordinals));
|
||||
} catch (IOException e) {
|
||||
// this should ideally never occur, but if it does just rethrow the error to the
|
||||
// caller
|
||||
throw new RuntimeException(e);
|
||||
try {
|
||||
// main check for correctness is done here
|
||||
if (assertGettingOrdinals) {
|
||||
assertGettingOrdinals(r1, ordinals, path);
|
||||
} else {
|
||||
assertGettingPaths(r1, path, ordinals);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// this should ideally never occur, but if it does just rethrow the error to the
|
||||
// caller
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
for (Thread t : addThreads) t.start();
|
||||
for (Thread t : addThreads) t.join();
|
||||
}
|
||||
|
||||
for (Thread t : addThreads) t.start();
|
||||
for (Thread t : addThreads) t.join();
|
||||
|
||||
r1.close();
|
||||
src.close();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue