mirror of https://github.com/apache/lucene.git
Split taxonomy arrays across chunks (#12995)
Split taxonomy arrays across chunks Taxonomy ordinals are added in an append-only way. Instead of reallocating a single big array when loading new taxonomy ordinals and copying all the values from the previous arrays over individually, we can keep blocks of ordinals and reuse blocks from the previous arrays.
This commit is contained in:
parent
24d557a4f6
commit
2a0b7f2056
|
@ -214,6 +214,8 @@ Optimizations
|
|||
|
||||
* GITHUB#:12997 Avoid reset BlockDocsEnum#freqBuffer when indexHasFreq is false. (Zhang Chao, Adrien Grand)
|
||||
|
||||
* GITHUB#12989: Split taxonomy facet arrays across reusable chunks of elements to reduce allocations. (Michael Froh, Stefan Vodita)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)
|
||||
|
|
|
@ -73,7 +73,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
|
||||
// Rollup any necessary dims:
|
||||
int[] children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
DimConfig ft = ent.getValue();
|
||||
|
@ -81,21 +81,21 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||
assert dimRootOrd > 0;
|
||||
float newValue =
|
||||
aggregationFunction.aggregate(values[dimRootOrd], rollup(children[dimRootOrd]));
|
||||
aggregationFunction.aggregate(values[dimRootOrd], rollup(children.get(dimRootOrd)));
|
||||
values[dimRootOrd] = newValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private float rollup(int ord) throws IOException {
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
float aggregationValue = 0f;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
float childValue = aggregationFunction.aggregate(values[ord], rollup(children[ord]));
|
||||
float childValue = aggregationFunction.aggregate(values[ord], rollup(children.get(ord)));
|
||||
values[ord] = childValue;
|
||||
aggregationValue = aggregationFunction.aggregate(aggregationValue, childValue);
|
||||
ord = siblings[ord];
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
return aggregationValue;
|
||||
}
|
||||
|
@ -133,10 +133,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
return null;
|
||||
}
|
||||
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
|
||||
int ord = children[dimOrd];
|
||||
int ord = children.get(dimOrd);
|
||||
float aggregatedValue = 0;
|
||||
|
||||
IntArrayList ordinals = new IntArrayList();
|
||||
|
@ -148,7 +148,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
ordinals.add(ord);
|
||||
ordValues.add(values[ord]);
|
||||
}
|
||||
ord = siblings[ord];
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
if (aggregatedValue == 0) {
|
||||
|
@ -206,10 +206,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
float bottomValue = 0;
|
||||
int bottomOrd = Integer.MAX_VALUE;
|
||||
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
|
||||
int ord = children[pathOrd];
|
||||
int ord = children.get(pathOrd);
|
||||
float aggregatedValue = 0;
|
||||
int childCount = 0;
|
||||
|
||||
|
@ -233,7 +233,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
}
|
||||
|
||||
ord = siblings[ord];
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
|
@ -294,8 +294,8 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
|
||||
// get existing children and siblings ordinal array from TaxonomyFacets
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
|
||||
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
|
@ -317,7 +317,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||
|
||||
// iterate over children and siblings ordinals for all dims
|
||||
int ord = children[TaxonomyReader.ROOT_ORDINAL];
|
||||
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
String dim = taxoReader.getPath(ord).components[0];
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
|
@ -364,7 +364,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
}
|
||||
}
|
||||
ord = siblings[ord];
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
FacetResult[] results = new FacetResult[pq.size()];
|
||||
|
|
|
@ -104,7 +104,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
|
||||
// Rollup any necessary dims:
|
||||
int[] children = null;
|
||||
ParallelTaxonomyArrays.IntArray children = null;
|
||||
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
DimConfig ft = ent.getValue();
|
||||
|
@ -118,7 +118,8 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
children = getChildren();
|
||||
}
|
||||
int currentValue = getValue(dimRootOrd);
|
||||
int newValue = aggregationFunction.aggregate(currentValue, rollup(children[dimRootOrd]));
|
||||
int newValue =
|
||||
aggregationFunction.aggregate(currentValue, rollup(children.get(dimRootOrd)));
|
||||
setValue(dimRootOrd, newValue);
|
||||
}
|
||||
}
|
||||
|
@ -126,15 +127,15 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
|
||||
private int rollup(int ord) throws IOException {
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int aggregatedValue = 0;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int currentValue = getValue(ord);
|
||||
int newValue = aggregationFunction.aggregate(currentValue, rollup(children[ord]));
|
||||
int newValue = aggregationFunction.aggregate(currentValue, rollup(children.get(ord)));
|
||||
setValue(ord, newValue);
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, getValue(ord));
|
||||
ord = siblings[ord];
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
return aggregatedValue;
|
||||
}
|
||||
|
@ -204,16 +205,16 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
for (IntIntCursor c : sparseValues) {
|
||||
int value = c.value;
|
||||
int ord = c.key;
|
||||
if (parents[ord] == dimOrd && value > 0) {
|
||||
if (parents.get(ord) == dimOrd && value > 0) {
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||
ordinals.add(ord);
|
||||
ordValues.add(value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
int ord = children[dimOrd];
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int ord = children.get(dimOrd);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int value = values[ord];
|
||||
if (value > 0) {
|
||||
|
@ -221,7 +222,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
ordinals.add(ord);
|
||||
ordValues.add(value);
|
||||
}
|
||||
ord = siblings[ord];
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -289,7 +290,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
for (IntIntCursor c : sparseValues) {
|
||||
int value = c.value;
|
||||
int ord = c.key;
|
||||
if (parents[ord] == pathOrd && value > 0) {
|
||||
if (parents.get(ord) == pathOrd && value > 0) {
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||
childCount++;
|
||||
if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
|
||||
|
@ -307,9 +308,9 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
int ord = children[pathOrd];
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int ord = children.get(pathOrd);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int value = values[ord];
|
||||
if (value > 0) {
|
||||
|
@ -328,7 +329,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
}
|
||||
}
|
||||
ord = siblings[ord];
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -355,8 +356,8 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
|
||||
// get children and siblings ordinal array from TaxonomyFacets
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
|
||||
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
|
@ -378,7 +379,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||
|
||||
// iterate over children and siblings ordinals for all dims
|
||||
int ord = children[TaxonomyReader.ROOT_ORDINAL];
|
||||
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
String dim = taxoReader.getPath(ord).components[0];
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
|
@ -425,7 +426,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
}
|
||||
}
|
||||
ord = siblings[ord];
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
FacetResult[] results = new FacetResult[pq.size()];
|
||||
|
|
|
@ -35,6 +35,26 @@ package org.apache.lucene.facet.taxonomy;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class ParallelTaxonomyArrays {
|
||||
/** Abstraction that looks like an int[], but read-only. */
|
||||
public abstract static class IntArray {
|
||||
/** Sole constructor * */
|
||||
public IntArray() {}
|
||||
|
||||
/**
|
||||
* Equivalent to array[i].
|
||||
*
|
||||
* @param i the index of the value to retrieve
|
||||
* @return the value at position i
|
||||
*/
|
||||
public abstract int get(int i);
|
||||
|
||||
/**
|
||||
* Equivalent to array.length.
|
||||
*
|
||||
* @return the allocated size of the array
|
||||
*/
|
||||
public abstract int length();
|
||||
}
|
||||
|
||||
/** Sole constructor. */
|
||||
public ParallelTaxonomyArrays() {}
|
||||
|
@ -43,17 +63,17 @@ public abstract class ParallelTaxonomyArrays {
|
|||
* Returns the parents array, where {@code parents[i]} denotes the parent of category ordinal
|
||||
* {@code i}.
|
||||
*/
|
||||
public abstract int[] parents();
|
||||
public abstract IntArray parents();
|
||||
|
||||
/**
|
||||
* Returns the children array, where {@code children[i]} denotes a child of category ordinal
|
||||
* {@code i}.
|
||||
*/
|
||||
public abstract int[] children();
|
||||
public abstract IntArray children();
|
||||
|
||||
/**
|
||||
* Returns the siblings array, where {@code siblings[i]} denotes the sibling of category ordinal
|
||||
* {@code i}.
|
||||
*/
|
||||
public abstract int[] siblings();
|
||||
public abstract IntArray siblings();
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ public class TaxonomyFacetLabels {
|
|||
private int currentDocOrdinalCount;
|
||||
|
||||
// Lazily set when nextFacetLabel(int docId, String facetDimension) is first called
|
||||
private int[] parents;
|
||||
private ParallelTaxonomyArrays.IntArray parents;
|
||||
|
||||
/** Construct from a specified {@link SortedNumericDocValues} field. */
|
||||
public FacetLabelReader(SortedNumericDocValues ordinalValues) {
|
||||
|
@ -141,10 +141,10 @@ public class TaxonomyFacetLabels {
|
|||
|
||||
private boolean isDescendant(int ord, int ancestorOrd) {
|
||||
while (ord != INVALID_ORDINAL && ord != ROOT_ORDINAL) {
|
||||
if (parents[ord] == ancestorOrd) {
|
||||
if (parents.get(ord) == ancestorOrd) {
|
||||
return true;
|
||||
}
|
||||
ord = parents[ord];
|
||||
ord = parents.get(ord);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -59,13 +59,13 @@ abstract class TaxonomyFacets extends Facets {
|
|||
final FacetsCollector fc;
|
||||
|
||||
/** Maps parent ordinal to its child, or -1 if the parent is childless. */
|
||||
private int[] children;
|
||||
private ParallelTaxonomyArrays.IntArray children;
|
||||
|
||||
/** Maps an ordinal to its sibling, or -1 if there is no sibling. */
|
||||
private int[] siblings;
|
||||
private ParallelTaxonomyArrays.IntArray siblings;
|
||||
|
||||
/** Maps an ordinal to its parent, or -1 if there is no parent (root node). */
|
||||
final int[] parents;
|
||||
final ParallelTaxonomyArrays.IntArray parents;
|
||||
|
||||
/** Sole constructor. */
|
||||
TaxonomyFacets(
|
||||
|
@ -82,7 +82,7 @@ abstract class TaxonomyFacets extends Facets {
|
|||
* Returns int[] mapping each ordinal to its first child; this is a large array and is computed
|
||||
* (and then saved) the first time this method is invoked.
|
||||
*/
|
||||
int[] getChildren() throws IOException {
|
||||
ParallelTaxonomyArrays.IntArray getChildren() throws IOException {
|
||||
if (children == null) {
|
||||
children = taxoReader.getParallelTaxonomyArrays().children();
|
||||
}
|
||||
|
@ -93,7 +93,7 @@ abstract class TaxonomyFacets extends Facets {
|
|||
* Returns int[] mapping each ordinal to its next sibling; this is a large array and is computed
|
||||
* (and then saved) the first time this method is invoked.
|
||||
*/
|
||||
int[] getSiblings() throws IOException {
|
||||
ParallelTaxonomyArrays.IntArray getSiblings() throws IOException {
|
||||
if (siblings == null) {
|
||||
siblings = taxoReader.getParallelTaxonomyArrays().siblings();
|
||||
}
|
||||
|
@ -150,9 +150,9 @@ abstract class TaxonomyFacets extends Facets {
|
|||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
int ord = children[TaxonomyReader.ROOT_ORDINAL];
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
|
||||
List<FacetResult> results = new ArrayList<>();
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
String dim = taxoReader.getPath(ord).components[0];
|
||||
|
@ -163,7 +163,7 @@ abstract class TaxonomyFacets extends Facets {
|
|||
results.add(result);
|
||||
}
|
||||
}
|
||||
ord = siblings[ord];
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
// Sort by highest value, tie break by dim:
|
||||
|
|
|
@ -61,10 +61,10 @@ public abstract class TaxonomyReader implements Closeable {
|
|||
/** An iterator over a category's children. */
|
||||
public static class ChildrenIterator {
|
||||
|
||||
private final int[] siblings;
|
||||
private final ParallelTaxonomyArrays.IntArray siblings;
|
||||
private int child;
|
||||
|
||||
ChildrenIterator(int child, int[] siblings) {
|
||||
ChildrenIterator(int child, ParallelTaxonomyArrays.IntArray siblings) {
|
||||
this.siblings = siblings;
|
||||
this.child = child;
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ public abstract class TaxonomyReader implements Closeable {
|
|||
public int next() {
|
||||
int res = child;
|
||||
if (child != TaxonomyReader.INVALID_ORDINAL) {
|
||||
child = siblings[child];
|
||||
child = siblings.get(child);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
@ -181,7 +181,7 @@ public abstract class TaxonomyReader implements Closeable {
|
|||
/** Returns an iterator over the children of the given ordinal. */
|
||||
public ChildrenIterator getChildren(final int ordinal) throws IOException {
|
||||
ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays();
|
||||
int child = ordinal >= 0 ? arrays.children()[ordinal] : INVALID_ORDINAL;
|
||||
int child = ordinal >= 0 ? arrays.children().get(ordinal) : INVALID_ORDINAL;
|
||||
return new ChildrenIterator(child, arrays.siblings());
|
||||
}
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.document.NumericDocValuesField;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
|
||||
|
@ -678,10 +679,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// was allocated bigger than it really needs to be.
|
||||
Objects.checkIndex(ordinal, nextID.get());
|
||||
|
||||
int[] parents = getTaxoArrays().parents();
|
||||
assert ordinal < parents.length
|
||||
: "requested ordinal (" + ordinal + "); parents.length (" + parents.length + ") !";
|
||||
return parents[ordinal];
|
||||
ParallelTaxonomyArrays.IntArray parents = getTaxoArrays().parents();
|
||||
assert ordinal < parents.length()
|
||||
: "requested ordinal (" + ordinal + "); parents.length (" + parents.length() + ") !";
|
||||
return parents.get(ordinal);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -38,27 +38,53 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable {
|
||||
private static final int CHUNK_SIZE_BITS = 13;
|
||||
static final int CHUNK_SIZE = 1 << CHUNK_SIZE_BITS;
|
||||
private static final int CHUNK_MASK = CHUNK_SIZE - 1;
|
||||
|
||||
private final int[] parents;
|
||||
private final ChunkedIntArray parents;
|
||||
|
||||
// the following two arrays are lazily initialized. note that we only keep a
|
||||
// single boolean member as volatile, instead of declaring the arrays
|
||||
// volatile. the code guarantees that only after the boolean is set to true,
|
||||
// the arrays are returned.
|
||||
private volatile boolean initializedChildren = false;
|
||||
private int[] children, siblings;
|
||||
private ChunkedIntArray children, siblings;
|
||||
|
||||
/** Used by {@link #add(int, int)} after the array grew. */
|
||||
private TaxonomyIndexArrays(int[] parents) {
|
||||
this.parents = parents;
|
||||
static class ChunkedIntArray extends ParallelTaxonomyArrays.IntArray {
|
||||
final int[][] values;
|
||||
|
||||
private ChunkedIntArray(int[][] values) {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int i) {
|
||||
return values[i >> CHUNK_SIZE_BITS][i & CHUNK_MASK];
|
||||
}
|
||||
|
||||
public void set(int i, int val) {
|
||||
values[i >> CHUNK_SIZE_BITS][i & CHUNK_MASK] = val;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return ((values.length - 1) << CHUNK_SIZE_BITS) + values[values.length - 1].length;
|
||||
}
|
||||
}
|
||||
|
||||
/** Used by {@link #add(int, int)} after the array grew. Also, used for testing. */
|
||||
TaxonomyIndexArrays(int[][] parents) {
|
||||
this.parents = new ChunkedIntArray(parents);
|
||||
}
|
||||
|
||||
public TaxonomyIndexArrays(IndexReader reader) throws IOException {
|
||||
parents = new int[reader.maxDoc()];
|
||||
if (parents.length > 0) {
|
||||
initParents(reader, 0);
|
||||
parents[0] = TaxonomyReader.INVALID_ORDINAL;
|
||||
int[][] parentArray = allocateChunkedArray(reader.maxDoc(), 0);
|
||||
if (parentArray.length > 0) {
|
||||
initParents(parentArray, reader, 0);
|
||||
parentArray[0][0] = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
parents = new ChunkedIntArray(parentArray);
|
||||
}
|
||||
|
||||
public TaxonomyIndexArrays(IndexReader reader, TaxonomyIndexArrays copyFrom) throws IOException {
|
||||
|
@ -68,25 +94,54 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
// it may be caused if e.g. the taxonomy segments were merged, and so an updated
|
||||
// NRT reader was obtained, even though nothing was changed. this is not very likely
|
||||
// to happen.
|
||||
int[] copyParents = copyFrom.parents();
|
||||
this.parents = new int[reader.maxDoc()];
|
||||
System.arraycopy(copyParents, 0, parents, 0, copyParents.length);
|
||||
initParents(reader, copyParents.length);
|
||||
|
||||
int[][] parentArray = allocateChunkedArray(reader.maxDoc(), copyFrom.parents.values.length - 1);
|
||||
if (parentArray.length > 0) {
|
||||
copyChunkedArray(copyFrom.parents.values, parentArray);
|
||||
initParents(parentArray, reader, copyFrom.parents.length());
|
||||
}
|
||||
parents = new ChunkedIntArray(parentArray);
|
||||
if (copyFrom.initializedChildren) {
|
||||
initChildrenSiblings(copyFrom);
|
||||
}
|
||||
}
|
||||
|
||||
private static int[][] allocateChunkedArray(int size, int startFrom) {
|
||||
int chunkCount = (size >> CHUNK_SIZE_BITS) + 1;
|
||||
int[][] array = new int[chunkCount][];
|
||||
for (int i = startFrom; i < chunkCount - 1; i++) {
|
||||
array[i] = new int[CHUNK_SIZE];
|
||||
}
|
||||
array[chunkCount - 1] = new int[size & CHUNK_MASK];
|
||||
return array;
|
||||
}
|
||||
|
||||
private static void copyChunkedArray(int[][] oldArray, int[][] newArray) {
|
||||
// Copy all but the last (maybe partial) chunk from the old array
|
||||
if (oldArray.length > 1) {
|
||||
System.arraycopy(oldArray, 0, newArray, 0, oldArray.length - 1);
|
||||
}
|
||||
int[] lastCopyChunk = oldArray[oldArray.length - 1];
|
||||
System.arraycopy(lastCopyChunk, 0, newArray[oldArray.length - 1], 0, lastCopyChunk.length);
|
||||
}
|
||||
|
||||
private synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) {
|
||||
if (!initializedChildren) { // must do this check !
|
||||
children = new int[parents.length];
|
||||
siblings = new int[parents.length];
|
||||
int startFrom;
|
||||
if (copyFrom == null) {
|
||||
startFrom = 0;
|
||||
} else {
|
||||
startFrom = copyFrom.parents.values.length - 1;
|
||||
}
|
||||
int[][] childrenArray = allocateChunkedArray(parents.length(), startFrom);
|
||||
int[][] siblingsArray = allocateChunkedArray(parents.length(), startFrom);
|
||||
// Rely on these arrays being copied by reference, since we may modify them below
|
||||
children = new ChunkedIntArray(childrenArray);
|
||||
siblings = new ChunkedIntArray(siblingsArray);
|
||||
if (copyFrom != null) {
|
||||
// called from the ctor, after we know copyFrom has initialized children/siblings
|
||||
System.arraycopy(copyFrom.children(), 0, children, 0, copyFrom.children().length);
|
||||
System.arraycopy(copyFrom.siblings(), 0, siblings, 0, copyFrom.siblings().length);
|
||||
computeChildrenSiblings(copyFrom.parents.length);
|
||||
copyChunkedArray(copyFrom.children.values, childrenArray);
|
||||
copyChunkedArray(copyFrom.siblings.values, siblingsArray);
|
||||
computeChildrenSiblings(copyFrom.parents.length());
|
||||
} else {
|
||||
computeChildrenSiblings(0);
|
||||
}
|
||||
|
@ -98,26 +153,31 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
// reset the youngest child of all ordinals. while this should be done only
|
||||
// for the leaves, we don't know up front which are the leaves, so we reset
|
||||
// all of them.
|
||||
for (int i = first; i < parents.length; i++) {
|
||||
children[i] = TaxonomyReader.INVALID_ORDINAL;
|
||||
int length = parents.length();
|
||||
for (int i = first; i < length; i++) {
|
||||
children.set(i, TaxonomyReader.INVALID_ORDINAL);
|
||||
}
|
||||
|
||||
// the root category has no parent, and therefore no siblings
|
||||
if (first == 0) {
|
||||
first = 1;
|
||||
siblings[0] = TaxonomyReader.INVALID_ORDINAL;
|
||||
siblings.set(0, TaxonomyReader.INVALID_ORDINAL);
|
||||
}
|
||||
|
||||
for (int i = first; i < parents.length; i++) {
|
||||
for (int i = first; i < length; i++) {
|
||||
int parent = parents.get(i);
|
||||
// The existing youngest child of the parent is the next older sibling of i.
|
||||
// note that parents[i] is always < i, so the right-hand-side of
|
||||
// the following line is already set when we get here
|
||||
siblings[i] = children[parents[i]];
|
||||
children[parents[i]] = i;
|
||||
siblings.set(i, children.get(parent));
|
||||
// The new youngest child of the parent is i.
|
||||
children.set(parent, i);
|
||||
}
|
||||
}
|
||||
|
||||
// Read the parents of the new categories
|
||||
private void initParents(IndexReader reader, int first) throws IOException {
|
||||
private static void initParents(int[][] parentsArray, IndexReader reader, int first)
|
||||
throws IOException {
|
||||
if (reader.maxDoc() == first) {
|
||||
return;
|
||||
}
|
||||
|
@ -141,7 +201,9 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
throw new CorruptIndexException(
|
||||
"Missing parent data for category " + (doc + leafContext.docBase), reader.toString());
|
||||
}
|
||||
parents[doc + leafContext.docBase] = Math.toIntExact(parentValues.longValue());
|
||||
int pos = doc + leafContext.docBase;
|
||||
parentsArray[pos >> CHUNK_SIZE_BITS][pos & CHUNK_MASK] =
|
||||
Math.toIntExact(parentValues.longValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -153,12 +215,15 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
* <p><b>NOTE:</b> you should call this method from a thread-safe code.
|
||||
*/
|
||||
TaxonomyIndexArrays add(int ordinal, int parentOrdinal) {
|
||||
if (ordinal >= parents.length) {
|
||||
int[] newarray = ArrayUtil.grow(parents, ordinal + 1);
|
||||
newarray[ordinal] = parentOrdinal;
|
||||
return new TaxonomyIndexArrays(newarray);
|
||||
if (ordinal >= parents.length()) {
|
||||
int[][] newParents =
|
||||
allocateChunkedArray(
|
||||
ArrayUtil.oversize(ordinal + 1, Integer.BYTES), parents.values.length - 1);
|
||||
copyChunkedArray(parents.values, newParents);
|
||||
newParents[ordinal >> CHUNK_SIZE_BITS][ordinal & CHUNK_MASK] = parentOrdinal;
|
||||
return new TaxonomyIndexArrays(newParents);
|
||||
}
|
||||
parents[ordinal] = parentOrdinal;
|
||||
parents.set(ordinal, parentOrdinal);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -167,7 +232,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
* {@code i}.
|
||||
*/
|
||||
@Override
|
||||
public int[] parents() {
|
||||
public ChunkedIntArray parents() {
|
||||
return parents;
|
||||
}
|
||||
|
||||
|
@ -177,7 +242,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
* taxonomy as an immediate child of {@code i}.
|
||||
*/
|
||||
@Override
|
||||
public int[] children() {
|
||||
public ChunkedIntArray children() {
|
||||
if (!initializedChildren) {
|
||||
initChildrenSiblings(null);
|
||||
}
|
||||
|
@ -191,7 +256,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
* {@code i}. The sibling is defined as the previous youngest child of {@code parents[i]}.
|
||||
*/
|
||||
@Override
|
||||
public int[] siblings() {
|
||||
public ChunkedIntArray siblings() {
|
||||
if (!initializedChildren) {
|
||||
initChildrenSiblings(null);
|
||||
}
|
||||
|
|
|
@ -20,7 +20,8 @@ import java.io.IOException;
|
|||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.SlowDirectory;
|
||||
|
@ -304,7 +305,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
assertEquals(1, tr.getSize());
|
||||
assertEquals(0, tr.getPath(0).length);
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents().get(0));
|
||||
assertEquals(0, tr.getOrdinal(new FacetLabel()));
|
||||
tr.close();
|
||||
indexDir.close();
|
||||
|
@ -323,7 +324,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
assertEquals(1, tr.getSize());
|
||||
assertEquals(0, tr.getPath(0).length);
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents().get(0));
|
||||
assertEquals(0, tr.getOrdinal(new FacetLabel()));
|
||||
tw.close();
|
||||
tr.close();
|
||||
|
@ -412,13 +413,13 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
|
||||
// check that the parent of the root ordinal is the invalid ordinal:
|
||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[0]);
|
||||
ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents();
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents.get(0));
|
||||
|
||||
// check parent of non-root ordinals:
|
||||
for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
|
||||
FacetLabel me = tr.getPath(ordinal);
|
||||
int parentOrdinal = parents[ordinal];
|
||||
int parentOrdinal = parents.get(ordinal);
|
||||
FacetLabel parent = tr.getPath(parentOrdinal);
|
||||
if (parent == null) {
|
||||
fail(
|
||||
|
@ -552,10 +553,10 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
tw.close();
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||
int[] youngestChildArray = ca.children();
|
||||
assertEquals(tr.getSize(), youngestChildArray.length);
|
||||
int[] olderSiblingArray = ca.siblings();
|
||||
assertEquals(tr.getSize(), olderSiblingArray.length);
|
||||
ParallelTaxonomyArrays.IntArray youngestChildArray = ca.children();
|
||||
assertEquals(tr.getSize(), youngestChildArray.length());
|
||||
ParallelTaxonomyArrays.IntArray olderSiblingArray = ca.siblings();
|
||||
assertEquals(tr.getSize(), olderSiblingArray.length());
|
||||
for (int i = 0; i < expectedCategories.length; i++) {
|
||||
// find expected children by looking at all expectedCategories
|
||||
// for children
|
||||
|
@ -578,12 +579,12 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
// check that children and expectedChildren are the same, with the
|
||||
// correct reverse (youngest to oldest) order:
|
||||
if (expectedChildren.size() == 0) {
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray.get(i));
|
||||
} else {
|
||||
int child = youngestChildArray[i];
|
||||
int child = youngestChildArray.get(i);
|
||||
assertEquals(expectedChildren.get(0).intValue(), child);
|
||||
for (int j = 1; j < expectedChildren.size(); j++) {
|
||||
child = olderSiblingArray[child];
|
||||
child = olderSiblingArray.get(child);
|
||||
assertEquals(expectedChildren.get(j).intValue(), child);
|
||||
// if child is INVALID_ORDINAL we should stop, but
|
||||
// assertEquals would fail in this case anyway.
|
||||
|
@ -591,7 +592,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
// When we're done comparing, olderSiblingArray should now point
|
||||
// to INVALID_ORDINAL, saying there are no more children. If it
|
||||
// doesn't, we found too many children...
|
||||
assertEquals(-1, olderSiblingArray[child]);
|
||||
assertEquals(-1, olderSiblingArray.get(child));
|
||||
}
|
||||
}
|
||||
tr.close();
|
||||
|
@ -613,34 +614,34 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
tw.close();
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||
int[] children = ca.children();
|
||||
assertEquals(tr.getSize(), children.length);
|
||||
int[] olderSiblingArray = ca.siblings();
|
||||
assertEquals(tr.getSize(), olderSiblingArray.length);
|
||||
ParallelTaxonomyArrays.IntArray children = ca.children();
|
||||
assertEquals(tr.getSize(), children.length());
|
||||
ParallelTaxonomyArrays.IntArray olderSiblingArray = ca.siblings();
|
||||
assertEquals(tr.getSize(), olderSiblingArray.length());
|
||||
|
||||
// test that the "youngest child" of every category is indeed a child:
|
||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
||||
ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents();
|
||||
for (int i = 0; i < tr.getSize(); i++) {
|
||||
int youngestChild = children[i];
|
||||
int youngestChild = children.get(i);
|
||||
if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
|
||||
assertEquals(i, parents[youngestChild]);
|
||||
assertEquals(i, parents.get(youngestChild));
|
||||
}
|
||||
}
|
||||
|
||||
// test that the "older sibling" of every category is indeed older (lower)
|
||||
// (it can also be INVALID_ORDINAL, which is lower than any ordinal)
|
||||
for (int i = 0; i < tr.getSize(); i++) {
|
||||
assertTrue("olderSiblingArray[" + i + "] should be <" + i, olderSiblingArray[i] < i);
|
||||
assertTrue("olderSiblingArray[" + i + "] should be <" + i, olderSiblingArray.get(i) < i);
|
||||
}
|
||||
|
||||
// test that the "older sibling" of every category is indeed a sibling
|
||||
// (they share the same parent)
|
||||
for (int i = 0; i < tr.getSize(); i++) {
|
||||
int sibling = olderSiblingArray[i];
|
||||
int sibling = olderSiblingArray.get(i);
|
||||
if (sibling == TaxonomyReader.INVALID_ORDINAL) {
|
||||
continue;
|
||||
}
|
||||
assertEquals(parents[i], parents[sibling]);
|
||||
assertEquals(parents.get(i), parents.get(sibling));
|
||||
}
|
||||
|
||||
// And now for slightly more complex (and less "invariant-like"...)
|
||||
|
@ -652,14 +653,14 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
// Find the really youngest child:
|
||||
int j;
|
||||
for (j = tr.getSize() - 1; j > i; j--) {
|
||||
if (parents[j] == i) {
|
||||
if (parents.get(j) == i) {
|
||||
break; // found youngest child
|
||||
}
|
||||
}
|
||||
if (j == i) { // no child found
|
||||
j = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
assertEquals(j, children[i]);
|
||||
assertEquals(j, children.get(i));
|
||||
}
|
||||
|
||||
// test that the "older sibling" is indeed the least oldest one - and
|
||||
|
@ -669,20 +670,26 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
// Find the youngest older sibling:
|
||||
int j;
|
||||
for (j = i - 1; j >= 0; j--) {
|
||||
if (parents[j] == parents[i]) {
|
||||
if (parents.get(j) == parents.get(i)) {
|
||||
break; // found youngest older sibling
|
||||
}
|
||||
}
|
||||
if (j < 0) { // no sibling found
|
||||
j = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
assertEquals(j, olderSiblingArray[i]);
|
||||
assertEquals(j, olderSiblingArray.get(i));
|
||||
}
|
||||
|
||||
tr.close();
|
||||
indexDir.close();
|
||||
}
|
||||
|
||||
private static void assertArrayEquals(int[] expected, ParallelTaxonomyArrays.IntArray actual) {
|
||||
for (int i = 0; i < expected.length; i++) {
|
||||
assertEquals(expected[i], actual.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
/** Test how getChildrenArrays() deals with the taxonomy's growth: */
|
||||
@Test
|
||||
public void testChildrenArraysGrowth() throws Exception {
|
||||
|
@ -693,10 +700,10 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||
assertEquals(3, tr.getSize());
|
||||
assertEquals(3, ca.siblings().length);
|
||||
assertEquals(3, ca.children().length);
|
||||
assertTrue(Arrays.equals(new int[] {1, 2, -1}, ca.children()));
|
||||
assertTrue(Arrays.equals(new int[] {-1, -1, -1}, ca.siblings()));
|
||||
assertEquals(3, ca.siblings().length());
|
||||
assertEquals(3, ca.children().length());
|
||||
assertArrayEquals(new int[] {1, 2, -1}, ca.children());
|
||||
assertArrayEquals(new int[] {-1, -1, -1}, ca.siblings());
|
||||
tw.addCategory(new FacetLabel("hi", "ho"));
|
||||
tw.addCategory(new FacetLabel("hello"));
|
||||
tw.commit();
|
||||
|
@ -704,8 +711,8 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays();
|
||||
assertSame(newca, ca); // we got exactly the same object
|
||||
assertEquals(3, tr.getSize());
|
||||
assertEquals(3, ca.siblings().length);
|
||||
assertEquals(3, ca.children().length);
|
||||
assertEquals(3, ca.siblings().length());
|
||||
assertEquals(3, ca.children().length());
|
||||
// After the refresh, things change:
|
||||
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
|
||||
assertNotNull(newtr);
|
||||
|
@ -713,10 +720,10 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
tr = newtr;
|
||||
ca = tr.getParallelTaxonomyArrays();
|
||||
assertEquals(5, tr.getSize());
|
||||
assertEquals(5, ca.siblings().length);
|
||||
assertEquals(5, ca.children().length);
|
||||
assertTrue(Arrays.equals(new int[] {4, 3, -1, -1, -1}, ca.children()));
|
||||
assertTrue(Arrays.equals(new int[] {-1, -1, -1, 2, 1}, ca.siblings()));
|
||||
assertEquals(5, ca.siblings().length());
|
||||
assertEquals(5, ca.children().length());
|
||||
assertArrayEquals(new int[] {4, 3, -1, -1, -1}, ca.children());
|
||||
assertArrayEquals(new int[] {-1, -1, -1, 2, 1}, ca.siblings());
|
||||
tw.close();
|
||||
tr.close();
|
||||
indexDir.close();
|
||||
|
@ -737,7 +744,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays();
|
||||
|
||||
final int abOrd = trBase.getOrdinal(abPath);
|
||||
final int abYoungChildBase1 = ca1.children()[abOrd];
|
||||
final int abYoungChildBase1 = ca1.children().get(abOrd);
|
||||
|
||||
final int numCategories = atLeast(200);
|
||||
for (int i = 0; i < numCategories; i++) {
|
||||
|
@ -751,7 +758,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
trBase = newTaxoReader;
|
||||
|
||||
final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays();
|
||||
final int abYoungChildBase2 = ca2.children()[abOrd];
|
||||
final int abYoungChildBase2 = ca2.children().get(abOrd);
|
||||
|
||||
int numRetries = atLeast(10);
|
||||
for (int retry = 0; retry < numRetries; retry++) {
|
||||
|
@ -799,7 +806,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
setPriority(1 + getPriority());
|
||||
try {
|
||||
while (!stop.get()) {
|
||||
int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1;
|
||||
int lastOrd = tr.getParallelTaxonomyArrays().parents().length() - 1;
|
||||
assertNotNull(
|
||||
"path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd));
|
||||
assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++);
|
||||
|
@ -812,7 +819,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
}
|
||||
|
||||
private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) {
|
||||
final int abYoungChild = ca.children()[abOrd];
|
||||
final int abYoungChild = ca.children().get(abOrd);
|
||||
assertTrue(
|
||||
"Retry "
|
||||
+ retry
|
||||
|
@ -828,7 +835,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
+ abYoungChildBase2
|
||||
+ " but was: "
|
||||
+ abYoungChild,
|
||||
abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children()[abOrd]);
|
||||
abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children().get(abOrd));
|
||||
}
|
||||
};
|
||||
thread.start();
|
||||
|
@ -903,7 +910,8 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
|
||||
int author = 1;
|
||||
try {
|
||||
assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents()[author]);
|
||||
assertEquals(
|
||||
TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents().get(author));
|
||||
// ok
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
|
@ -926,10 +934,10 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
assertNotNull(newTaxoReader);
|
||||
tr.close();
|
||||
tr = newTaxoReader;
|
||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
||||
assertEquals(author, parents[dawkins]);
|
||||
assertEquals(TaxonomyReader.ROOT_ORDINAL, parents[author]);
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]);
|
||||
ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents();
|
||||
assertEquals(author, parents.get(dawkins));
|
||||
assertEquals(TaxonomyReader.ROOT_ORDINAL, parents.get(author));
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents.get(TaxonomyReader.ROOT_ORDINAL));
|
||||
assertEquals(3, tr.getSize());
|
||||
tw.close();
|
||||
tr.close();
|
||||
|
@ -1097,6 +1105,64 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
private static String[][] manyCategories(int count, int roundSize) {
|
||||
String[][] result = new String[count / roundSize + 1][];
|
||||
int k = 0;
|
||||
do {
|
||||
k += roundSize;
|
||||
List<String> round = new ArrayList<>();
|
||||
for (int i = k - roundSize + 1; i <= k && i < count; i++) {
|
||||
round.add(String.format(Locale.ROOT, "category %d of %d", i, k));
|
||||
}
|
||||
result[k / roundSize - 1] = round.toArray(new String[0]);
|
||||
} while (k <= count);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void testThousandsOfCategories() throws IOException {
|
||||
int roundSize = random().nextInt(2, 4);
|
||||
int size = random().nextInt(16384, 32768);
|
||||
Directory indexDir = newDirectory();
|
||||
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
|
||||
String[][] manyCategories = manyCategories(size, roundSize);
|
||||
for (String[] elem : manyCategories) {
|
||||
if (elem == null) {
|
||||
throw new IllegalStateException(
|
||||
"Got null array with size = " + size + " and roundSize = " + roundSize);
|
||||
} else if (elem.length > 0) {
|
||||
tw.addCategory(new FacetLabel(elem));
|
||||
}
|
||||
}
|
||||
tw.close();
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||
ParallelTaxonomyArrays.IntArray parents = ca.parents();
|
||||
ParallelTaxonomyArrays.IntArray children = ca.children();
|
||||
assertEquals(size, parents.length());
|
||||
assertEquals(size, children.length());
|
||||
for (int j = 1; j < size - roundSize; j += roundSize) {
|
||||
// Top level categories all have root as their parent.
|
||||
assertEquals(0, parents.get(j));
|
||||
for (int i = j; i < j + roundSize - 1; i++) {
|
||||
// Children extend in a chain from the top level category.
|
||||
// The parent/child relationships are symmetric.
|
||||
assertEquals(i + 1, children.get(i));
|
||||
if (i > j) {
|
||||
assertEquals(i - 1, parents.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
ParallelTaxonomyArrays.IntArray siblings = ca.siblings();
|
||||
assertEquals(size, siblings.length());
|
||||
for (int i = 1; i < size - roundSize; i += roundSize) {
|
||||
// Each top-level category (after the first) has the previous top-level category as their
|
||||
// older sibling.
|
||||
assertEquals(i, siblings.get(i + roundSize));
|
||||
}
|
||||
tr.close();
|
||||
indexDir.close();
|
||||
}
|
||||
|
||||
// TODO (Facet): test multiple readers, one writer. Have the multiple readers
|
||||
// using the same object (simulating threads) or different objects
|
||||
// (simulating processes).
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.facet.FacetField;
|
|||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -153,7 +154,7 @@ public class TestConcurrentFacetedIndexing extends FacetTestCase {
|
|||
}
|
||||
fail("mismatch number of categories");
|
||||
}
|
||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
||||
ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents();
|
||||
for (String cat : values.keySet()) {
|
||||
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
|
||||
assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
|
||||
|
@ -163,7 +164,7 @@ public class TestConcurrentFacetedIndexing extends FacetTestCase {
|
|||
for (int i = 0; i < level; i++) {
|
||||
path = cp.subpath(i + 1);
|
||||
int ord = tr.getOrdinal(path);
|
||||
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
|
||||
assertEquals("invalid parent for cp=" + path, parentOrd, parents.get(ord));
|
||||
parentOrd = ord; // next level should have this parent
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Set;
|
|||
import java.util.stream.IntStream;
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
@ -248,11 +249,11 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
// assert categories
|
||||
assertEquals(numCategories, reader.getSize());
|
||||
int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
|
||||
int[] parents = reader.getParallelTaxonomyArrays().parents();
|
||||
assertEquals(0, parents[roundOrdinal]); // round's parent is root
|
||||
ParallelTaxonomyArrays.IntArray parents = reader.getParallelTaxonomyArrays().parents();
|
||||
assertEquals(0, parents.get(roundOrdinal)); // round's parent is root
|
||||
for (int j = 0; j < numCats; j++) {
|
||||
int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
|
||||
assertEquals(roundOrdinal, parents[ord]); // round's parent is root
|
||||
assertEquals(roundOrdinal, parents.get(ord)); // round's parent is root
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -286,7 +287,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
|
||||
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
||||
assertEquals(1, reader.getSize());
|
||||
assertEquals(1, reader.getParallelTaxonomyArrays().parents().length);
|
||||
assertEquals(1, reader.getParallelTaxonomyArrays().parents().length());
|
||||
|
||||
// add category and call forceMerge -- this should flush IW and merge segments down to 1
|
||||
// in ParentArray.initFromReader, this used to fail assuming there are no parents.
|
||||
|
@ -299,7 +300,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
reader.close();
|
||||
reader = newtr;
|
||||
assertEquals(2, reader.getSize());
|
||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
|
||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length());
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
|
@ -336,7 +337,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
|
||||
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
||||
assertEquals(2, reader.getSize());
|
||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
|
||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length());
|
||||
|
||||
// merge all the segments so that NRT reader thinks there's a change
|
||||
iw.forceMerge(1);
|
||||
|
@ -347,7 +348,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
|||
reader.close();
|
||||
reader = newtr;
|
||||
assertEquals(2, reader.getSize());
|
||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
|
||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length());
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.facet.FacetField;
|
|||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
|
||||
|
@ -366,7 +367,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
|
|||
fail("mismatch number of categories");
|
||||
}
|
||||
|
||||
int[] parents = dtr.getParallelTaxonomyArrays().parents();
|
||||
ParallelTaxonomyArrays.IntArray parents = dtr.getParallelTaxonomyArrays().parents();
|
||||
for (String cat : values.keySet()) {
|
||||
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
|
||||
assertTrue("category not found " + cp, dtr.getOrdinal(cp) > 0);
|
||||
|
@ -376,7 +377,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
|
|||
for (int i = 0; i < level; i++) {
|
||||
path = cp.subpath(i + 1);
|
||||
int ord = dtr.getOrdinal(path);
|
||||
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
|
||||
assertEquals("invalid parent for cp=" + path, parentOrd, parents.get(ord));
|
||||
parentOrd = ord; // next level should have this parent
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
package org.apache.lucene.facet.taxonomy.directory;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
public class TestTaxonomyIndexArrays extends LuceneTestCase {
|
||||
|
||||
private void checkInvariants(TaxonomyIndexArrays oldArray, TaxonomyIndexArrays newArray) {
|
||||
TaxonomyIndexArrays.ChunkedIntArray oldParents = oldArray.parents();
|
||||
TaxonomyIndexArrays.ChunkedIntArray newParents = newArray.parents();
|
||||
for (int i = 0; i < oldParents.values.length - 1; i++) {
|
||||
assertSame(oldParents.values[i], newParents.values[i]);
|
||||
}
|
||||
int lastOldChunk = oldParents.values.length - 1;
|
||||
for (int i = 0; i < oldParents.values[lastOldChunk].length; i++) {
|
||||
assertEquals(oldParents.values[lastOldChunk][i], newParents.values[lastOldChunk][i]);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandom() {
|
||||
TaxonomyIndexArrays oldArray =
|
||||
new TaxonomyIndexArrays(new int[][] {new int[] {TaxonomyReader.INVALID_ORDINAL}});
|
||||
int numIterations = 100;
|
||||
int ordinal = 1;
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
int newOrdinal = ordinal + random().nextInt(TaxonomyIndexArrays.CHUNK_SIZE);
|
||||
TaxonomyIndexArrays newArray = oldArray.add(newOrdinal, ordinal);
|
||||
checkInvariants(oldArray, newArray);
|
||||
ordinal = newOrdinal;
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultiplesOfChunkSize() {
|
||||
TaxonomyIndexArrays oldArray =
|
||||
new TaxonomyIndexArrays(new int[][] {new int[] {TaxonomyReader.INVALID_ORDINAL}});
|
||||
int numIterations = 20;
|
||||
int ordinal = TaxonomyIndexArrays.CHUNK_SIZE;
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
int newOrdinal = ordinal + TaxonomyIndexArrays.CHUNK_SIZE;
|
||||
TaxonomyIndexArrays newArray = oldArray.add(newOrdinal, ordinal);
|
||||
checkInvariants(oldArray, newArray);
|
||||
ordinal = newOrdinal;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue