diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 89edb6e5b90..06cacbee487 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -214,6 +214,8 @@ Optimizations * GITHUB#:12997 Avoid reset BlockDocsEnum#freqBuffer when indexHasFreq is false. (Zhang Chao, Adrien Grand) +* GITHUB#12989: Split taxonomy facet arrays across reusable chunks of elements to reduce allocations. (Michael Froh, Stefan Vodita) + Bug Fixes --------------------- * GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java index fcf74c22141..bae77ed31d4 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java @@ -73,7 +73,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { } // Rollup any necessary dims: - int[] children = getChildren(); + ParallelTaxonomyArrays.IntArray children = getChildren(); for (Map.Entry ent : config.getDimConfigs().entrySet()) { String dim = ent.getKey(); DimConfig ft = ent.getValue(); @@ -81,21 +81,21 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); assert dimRootOrd > 0; float newValue = - aggregationFunction.aggregate(values[dimRootOrd], rollup(children[dimRootOrd])); + aggregationFunction.aggregate(values[dimRootOrd], rollup(children.get(dimRootOrd))); values[dimRootOrd] = newValue; } } } private float rollup(int ord) throws IOException { - int[] children = getChildren(); - int[] siblings = getSiblings(); + ParallelTaxonomyArrays.IntArray children = getChildren(); + ParallelTaxonomyArrays.IntArray siblings = getSiblings(); float aggregationValue = 0f; while (ord != TaxonomyReader.INVALID_ORDINAL) { - float childValue = aggregationFunction.aggregate(values[ord], rollup(children[ord])); + float childValue = aggregationFunction.aggregate(values[ord], rollup(children.get(ord))); values[ord] = childValue; aggregationValue = aggregationFunction.aggregate(aggregationValue, childValue); - ord = siblings[ord]; + ord = siblings.get(ord); } return aggregationValue; } @@ -133,10 +133,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { return null; } - int[] children = getChildren(); - int[] siblings = getSiblings(); + ParallelTaxonomyArrays.IntArray children = getChildren(); + ParallelTaxonomyArrays.IntArray siblings = getSiblings(); - int ord = children[dimOrd]; + int ord = children.get(dimOrd); float aggregatedValue = 0; IntArrayList ordinals = new IntArrayList(); @@ -148,7 +148,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { ordinals.add(ord); ordValues.add(values[ord]); } - ord = siblings[ord]; + ord = siblings.get(ord); } if (aggregatedValue == 0) { @@ -206,10 +206,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { float bottomValue = 0; int bottomOrd = Integer.MAX_VALUE; - int[] children = getChildren(); - int[] siblings = getSiblings(); + ParallelTaxonomyArrays.IntArray children = getChildren(); + ParallelTaxonomyArrays.IntArray siblings = getSiblings(); - int ord = children[pathOrd]; + int ord = children.get(pathOrd); float aggregatedValue = 0; int childCount = 0; @@ -233,7 +233,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { } } - ord = siblings[ord]; + ord = siblings.get(ord); } if (dimConfig.multiValued) { @@ -294,8 +294,8 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { } // get existing children and siblings ordinal array from TaxonomyFacets - int[] children = getChildren(); - int[] siblings = getSiblings(); + ParallelTaxonomyArrays.IntArray children = getChildren(); + ParallelTaxonomyArrays.IntArray siblings = getSiblings(); // Create priority queue to store top dimensions and sort by their aggregated values/hits and // string values. @@ -317,7 +317,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { Map intermediateResults = null; // iterate over children and siblings ordinals for all dims - int ord = children[TaxonomyReader.ROOT_ORDINAL]; + int ord = children.get(TaxonomyReader.ROOT_ORDINAL); while (ord != TaxonomyReader.INVALID_ORDINAL) { String dim = taxoReader.getPath(ord).components[0]; FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim); @@ -364,7 +364,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { } } } - ord = siblings[ord]; + ord = siblings.get(ord); } FacetResult[] results = new FacetResult[pq.size()]; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java index 3a26d83000b..fa483f17a61 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java @@ -104,7 +104,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { } // Rollup any necessary dims: - int[] children = null; + ParallelTaxonomyArrays.IntArray children = null; for (Map.Entry ent : config.getDimConfigs().entrySet()) { String dim = ent.getKey(); DimConfig ft = ent.getValue(); @@ -118,7 +118,8 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { children = getChildren(); } int currentValue = getValue(dimRootOrd); - int newValue = aggregationFunction.aggregate(currentValue, rollup(children[dimRootOrd])); + int newValue = + aggregationFunction.aggregate(currentValue, rollup(children.get(dimRootOrd))); setValue(dimRootOrd, newValue); } } @@ -126,15 +127,15 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { } private int rollup(int ord) throws IOException { - int[] children = getChildren(); - int[] siblings = getSiblings(); + ParallelTaxonomyArrays.IntArray children = getChildren(); + ParallelTaxonomyArrays.IntArray siblings = getSiblings(); int aggregatedValue = 0; while (ord != TaxonomyReader.INVALID_ORDINAL) { int currentValue = getValue(ord); - int newValue = aggregationFunction.aggregate(currentValue, rollup(children[ord])); + int newValue = aggregationFunction.aggregate(currentValue, rollup(children.get(ord))); setValue(ord, newValue); aggregatedValue = aggregationFunction.aggregate(aggregatedValue, getValue(ord)); - ord = siblings[ord]; + ord = siblings.get(ord); } return aggregatedValue; } @@ -204,16 +205,16 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { for (IntIntCursor c : sparseValues) { int value = c.value; int ord = c.key; - if (parents[ord] == dimOrd && value > 0) { + if (parents.get(ord) == dimOrd && value > 0) { aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value); ordinals.add(ord); ordValues.add(value); } } } else { - int[] children = getChildren(); - int[] siblings = getSiblings(); - int ord = children[dimOrd]; + ParallelTaxonomyArrays.IntArray children = getChildren(); + ParallelTaxonomyArrays.IntArray siblings = getSiblings(); + int ord = children.get(dimOrd); while (ord != TaxonomyReader.INVALID_ORDINAL) { int value = values[ord]; if (value > 0) { @@ -221,7 +222,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { ordinals.add(ord); ordValues.add(value); } - ord = siblings[ord]; + ord = siblings.get(ord); } } @@ -289,7 +290,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { for (IntIntCursor c : sparseValues) { int value = c.value; int ord = c.key; - if (parents[ord] == pathOrd && value > 0) { + if (parents.get(ord) == pathOrd && value > 0) { aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value); childCount++; if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) { @@ -307,9 +308,9 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { } } } else { - int[] children = getChildren(); - int[] siblings = getSiblings(); - int ord = children[pathOrd]; + ParallelTaxonomyArrays.IntArray children = getChildren(); + ParallelTaxonomyArrays.IntArray siblings = getSiblings(); + int ord = children.get(pathOrd); while (ord != TaxonomyReader.INVALID_ORDINAL) { int value = values[ord]; if (value > 0) { @@ -328,7 +329,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { } } } - ord = siblings[ord]; + ord = siblings.get(ord); } } @@ -355,8 +356,8 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { } // get children and siblings ordinal array from TaxonomyFacets - int[] children = getChildren(); - int[] siblings = getSiblings(); + ParallelTaxonomyArrays.IntArray children = getChildren(); + ParallelTaxonomyArrays.IntArray siblings = getSiblings(); // Create priority queue to store top dimensions and sort by their aggregated values/hits and // string values. @@ -378,7 +379,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { Map intermediateResults = null; // iterate over children and siblings ordinals for all dims - int ord = children[TaxonomyReader.ROOT_ORDINAL]; + int ord = children.get(TaxonomyReader.ROOT_ORDINAL); while (ord != TaxonomyReader.INVALID_ORDINAL) { String dim = taxoReader.getPath(ord).components[0]; FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim); @@ -425,7 +426,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { } } } - ord = siblings[ord]; + ord = siblings.get(ord); } FacetResult[] results = new FacetResult[pq.size()]; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ParallelTaxonomyArrays.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ParallelTaxonomyArrays.java index 30970a0157e..3cae083b2c4 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ParallelTaxonomyArrays.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ParallelTaxonomyArrays.java @@ -35,6 +35,26 @@ package org.apache.lucene.facet.taxonomy; * @lucene.experimental */ public abstract class ParallelTaxonomyArrays { + /** Abstraction that looks like an int[], but read-only. */ + public abstract static class IntArray { + /** Sole constructor * */ + public IntArray() {} + + /** + * Equivalent to array[i]. + * + * @param i the index of the value to retrieve + * @return the value at position i + */ + public abstract int get(int i); + + /** + * Equivalent to array.length. + * + * @return the allocated size of the array + */ + public abstract int length(); + } /** Sole constructor. */ public ParallelTaxonomyArrays() {} @@ -43,17 +63,17 @@ public abstract class ParallelTaxonomyArrays { * Returns the parents array, where {@code parents[i]} denotes the parent of category ordinal * {@code i}. */ - public abstract int[] parents(); + public abstract IntArray parents(); /** * Returns the children array, where {@code children[i]} denotes a child of category ordinal * {@code i}. */ - public abstract int[] children(); + public abstract IntArray children(); /** * Returns the siblings array, where {@code siblings[i]} denotes the sibling of category ordinal * {@code i}. */ - public abstract int[] siblings(); + public abstract IntArray siblings(); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java index 75198f2ca93..49567e75eb7 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetLabels.java @@ -84,7 +84,7 @@ public class TaxonomyFacetLabels { private int currentDocOrdinalCount; // Lazily set when nextFacetLabel(int docId, String facetDimension) is first called - private int[] parents; + private ParallelTaxonomyArrays.IntArray parents; /** Construct from a specified {@link SortedNumericDocValues} field. */ public FacetLabelReader(SortedNumericDocValues ordinalValues) { @@ -141,10 +141,10 @@ public class TaxonomyFacetLabels { private boolean isDescendant(int ord, int ancestorOrd) { while (ord != INVALID_ORDINAL && ord != ROOT_ORDINAL) { - if (parents[ord] == ancestorOrd) { + if (parents.get(ord) == ancestorOrd) { return true; } - ord = parents[ord]; + ord = parents.get(ord); } return false; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java index 97d3179e11e..31f7acf033d 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java @@ -59,13 +59,13 @@ abstract class TaxonomyFacets extends Facets { final FacetsCollector fc; /** Maps parent ordinal to its child, or -1 if the parent is childless. */ - private int[] children; + private ParallelTaxonomyArrays.IntArray children; /** Maps an ordinal to its sibling, or -1 if there is no sibling. */ - private int[] siblings; + private ParallelTaxonomyArrays.IntArray siblings; /** Maps an ordinal to its parent, or -1 if there is no parent (root node). */ - final int[] parents; + final ParallelTaxonomyArrays.IntArray parents; /** Sole constructor. */ TaxonomyFacets( @@ -82,7 +82,7 @@ abstract class TaxonomyFacets extends Facets { * Returns int[] mapping each ordinal to its first child; this is a large array and is computed * (and then saved) the first time this method is invoked. */ - int[] getChildren() throws IOException { + ParallelTaxonomyArrays.IntArray getChildren() throws IOException { if (children == null) { children = taxoReader.getParallelTaxonomyArrays().children(); } @@ -93,7 +93,7 @@ abstract class TaxonomyFacets extends Facets { * Returns int[] mapping each ordinal to its next sibling; this is a large array and is computed * (and then saved) the first time this method is invoked. */ - int[] getSiblings() throws IOException { + ParallelTaxonomyArrays.IntArray getSiblings() throws IOException { if (siblings == null) { siblings = taxoReader.getParallelTaxonomyArrays().siblings(); } @@ -150,9 +150,9 @@ abstract class TaxonomyFacets extends Facets { return Collections.emptyList(); } - int[] children = getChildren(); - int[] siblings = getSiblings(); - int ord = children[TaxonomyReader.ROOT_ORDINAL]; + ParallelTaxonomyArrays.IntArray children = getChildren(); + ParallelTaxonomyArrays.IntArray siblings = getSiblings(); + int ord = children.get(TaxonomyReader.ROOT_ORDINAL); List results = new ArrayList<>(); while (ord != TaxonomyReader.INVALID_ORDINAL) { String dim = taxoReader.getPath(ord).components[0]; @@ -163,7 +163,7 @@ abstract class TaxonomyFacets extends Facets { results.add(result); } } - ord = siblings[ord]; + ord = siblings.get(ord); } // Sort by highest value, tie break by dim: diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java index cd5729d5109..7ef1c53d9bb 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java @@ -61,10 +61,10 @@ public abstract class TaxonomyReader implements Closeable { /** An iterator over a category's children. */ public static class ChildrenIterator { - private final int[] siblings; + private final ParallelTaxonomyArrays.IntArray siblings; private int child; - ChildrenIterator(int child, int[] siblings) { + ChildrenIterator(int child, ParallelTaxonomyArrays.IntArray siblings) { this.siblings = siblings; this.child = child; } @@ -75,7 +75,7 @@ public abstract class TaxonomyReader implements Closeable { public int next() { int res = child; if (child != TaxonomyReader.INVALID_ORDINAL) { - child = siblings[child]; + child = siblings.get(child); } return res; } @@ -181,7 +181,7 @@ public abstract class TaxonomyReader implements Closeable { /** Returns an iterator over the children of the given ordinal. */ public ChildrenIterator getChildren(final int ordinal) throws IOException { ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays(); - int child = ordinal >= 0 ? arrays.children()[ordinal] : INVALID_ORDINAL; + int child = ordinal >= 0 ? arrays.children().get(ordinal) : INVALID_ORDINAL; return new ChildrenIterator(child, arrays.siblings()); } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java index 97ec1c57529..86f3d18deed 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java @@ -34,6 +34,7 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.taxonomy.FacetLabel; +import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache; @@ -678,10 +679,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { // was allocated bigger than it really needs to be. Objects.checkIndex(ordinal, nextID.get()); - int[] parents = getTaxoArrays().parents(); - assert ordinal < parents.length - : "requested ordinal (" + ordinal + "); parents.length (" + parents.length + ") !"; - return parents[ordinal]; + ParallelTaxonomyArrays.IntArray parents = getTaxoArrays().parents(); + assert ordinal < parents.length() + : "requested ordinal (" + ordinal + "); parents.length (" + parents.length() + ") !"; + return parents.get(ordinal); } /** diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java index 7ec905f1d90..d9a844f6c98 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/TaxonomyIndexArrays.java @@ -38,27 +38,53 @@ import org.apache.lucene.util.RamUsageEstimator; * @lucene.experimental */ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable { + private static final int CHUNK_SIZE_BITS = 13; + static final int CHUNK_SIZE = 1 << CHUNK_SIZE_BITS; + private static final int CHUNK_MASK = CHUNK_SIZE - 1; - private final int[] parents; + private final ChunkedIntArray parents; // the following two arrays are lazily initialized. note that we only keep a // single boolean member as volatile, instead of declaring the arrays // volatile. the code guarantees that only after the boolean is set to true, // the arrays are returned. private volatile boolean initializedChildren = false; - private int[] children, siblings; + private ChunkedIntArray children, siblings; - /** Used by {@link #add(int, int)} after the array grew. */ - private TaxonomyIndexArrays(int[] parents) { - this.parents = parents; + static class ChunkedIntArray extends ParallelTaxonomyArrays.IntArray { + final int[][] values; + + private ChunkedIntArray(int[][] values) { + this.values = values; + } + + @Override + public int get(int i) { + return values[i >> CHUNK_SIZE_BITS][i & CHUNK_MASK]; + } + + public void set(int i, int val) { + values[i >> CHUNK_SIZE_BITS][i & CHUNK_MASK] = val; + } + + @Override + public int length() { + return ((values.length - 1) << CHUNK_SIZE_BITS) + values[values.length - 1].length; + } + } + + /** Used by {@link #add(int, int)} after the array grew. Also, used for testing. */ + TaxonomyIndexArrays(int[][] parents) { + this.parents = new ChunkedIntArray(parents); } public TaxonomyIndexArrays(IndexReader reader) throws IOException { - parents = new int[reader.maxDoc()]; - if (parents.length > 0) { - initParents(reader, 0); - parents[0] = TaxonomyReader.INVALID_ORDINAL; + int[][] parentArray = allocateChunkedArray(reader.maxDoc(), 0); + if (parentArray.length > 0) { + initParents(parentArray, reader, 0); + parentArray[0][0] = TaxonomyReader.INVALID_ORDINAL; } + parents = new ChunkedIntArray(parentArray); } public TaxonomyIndexArrays(IndexReader reader, TaxonomyIndexArrays copyFrom) throws IOException { @@ -68,25 +94,54 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable // it may be caused if e.g. the taxonomy segments were merged, and so an updated // NRT reader was obtained, even though nothing was changed. this is not very likely // to happen. - int[] copyParents = copyFrom.parents(); - this.parents = new int[reader.maxDoc()]; - System.arraycopy(copyParents, 0, parents, 0, copyParents.length); - initParents(reader, copyParents.length); - + int[][] parentArray = allocateChunkedArray(reader.maxDoc(), copyFrom.parents.values.length - 1); + if (parentArray.length > 0) { + copyChunkedArray(copyFrom.parents.values, parentArray); + initParents(parentArray, reader, copyFrom.parents.length()); + } + parents = new ChunkedIntArray(parentArray); if (copyFrom.initializedChildren) { initChildrenSiblings(copyFrom); } } + private static int[][] allocateChunkedArray(int size, int startFrom) { + int chunkCount = (size >> CHUNK_SIZE_BITS) + 1; + int[][] array = new int[chunkCount][]; + for (int i = startFrom; i < chunkCount - 1; i++) { + array[i] = new int[CHUNK_SIZE]; + } + array[chunkCount - 1] = new int[size & CHUNK_MASK]; + return array; + } + + private static void copyChunkedArray(int[][] oldArray, int[][] newArray) { + // Copy all but the last (maybe partial) chunk from the old array + if (oldArray.length > 1) { + System.arraycopy(oldArray, 0, newArray, 0, oldArray.length - 1); + } + int[] lastCopyChunk = oldArray[oldArray.length - 1]; + System.arraycopy(lastCopyChunk, 0, newArray[oldArray.length - 1], 0, lastCopyChunk.length); + } + private synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) { if (!initializedChildren) { // must do this check ! - children = new int[parents.length]; - siblings = new int[parents.length]; + int startFrom; + if (copyFrom == null) { + startFrom = 0; + } else { + startFrom = copyFrom.parents.values.length - 1; + } + int[][] childrenArray = allocateChunkedArray(parents.length(), startFrom); + int[][] siblingsArray = allocateChunkedArray(parents.length(), startFrom); + // Rely on these arrays being copied by reference, since we may modify them below + children = new ChunkedIntArray(childrenArray); + siblings = new ChunkedIntArray(siblingsArray); if (copyFrom != null) { // called from the ctor, after we know copyFrom has initialized children/siblings - System.arraycopy(copyFrom.children(), 0, children, 0, copyFrom.children().length); - System.arraycopy(copyFrom.siblings(), 0, siblings, 0, copyFrom.siblings().length); - computeChildrenSiblings(copyFrom.parents.length); + copyChunkedArray(copyFrom.children.values, childrenArray); + copyChunkedArray(copyFrom.siblings.values, siblingsArray); + computeChildrenSiblings(copyFrom.parents.length()); } else { computeChildrenSiblings(0); } @@ -98,26 +153,31 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable // reset the youngest child of all ordinals. while this should be done only // for the leaves, we don't know up front which are the leaves, so we reset // all of them. - for (int i = first; i < parents.length; i++) { - children[i] = TaxonomyReader.INVALID_ORDINAL; + int length = parents.length(); + for (int i = first; i < length; i++) { + children.set(i, TaxonomyReader.INVALID_ORDINAL); } // the root category has no parent, and therefore no siblings if (first == 0) { first = 1; - siblings[0] = TaxonomyReader.INVALID_ORDINAL; + siblings.set(0, TaxonomyReader.INVALID_ORDINAL); } - for (int i = first; i < parents.length; i++) { + for (int i = first; i < length; i++) { + int parent = parents.get(i); + // The existing youngest child of the parent is the next older sibling of i. // note that parents[i] is always < i, so the right-hand-side of // the following line is already set when we get here - siblings[i] = children[parents[i]]; - children[parents[i]] = i; + siblings.set(i, children.get(parent)); + // The new youngest child of the parent is i. + children.set(parent, i); } } // Read the parents of the new categories - private void initParents(IndexReader reader, int first) throws IOException { + private static void initParents(int[][] parentsArray, IndexReader reader, int first) + throws IOException { if (reader.maxDoc() == first) { return; } @@ -141,7 +201,9 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable throw new CorruptIndexException( "Missing parent data for category " + (doc + leafContext.docBase), reader.toString()); } - parents[doc + leafContext.docBase] = Math.toIntExact(parentValues.longValue()); + int pos = doc + leafContext.docBase; + parentsArray[pos >> CHUNK_SIZE_BITS][pos & CHUNK_MASK] = + Math.toIntExact(parentValues.longValue()); } } } @@ -153,12 +215,15 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable *

NOTE: you should call this method from a thread-safe code. */ TaxonomyIndexArrays add(int ordinal, int parentOrdinal) { - if (ordinal >= parents.length) { - int[] newarray = ArrayUtil.grow(parents, ordinal + 1); - newarray[ordinal] = parentOrdinal; - return new TaxonomyIndexArrays(newarray); + if (ordinal >= parents.length()) { + int[][] newParents = + allocateChunkedArray( + ArrayUtil.oversize(ordinal + 1, Integer.BYTES), parents.values.length - 1); + copyChunkedArray(parents.values, newParents); + newParents[ordinal >> CHUNK_SIZE_BITS][ordinal & CHUNK_MASK] = parentOrdinal; + return new TaxonomyIndexArrays(newParents); } - parents[ordinal] = parentOrdinal; + parents.set(ordinal, parentOrdinal); return this; } @@ -167,7 +232,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable * {@code i}. */ @Override - public int[] parents() { + public ChunkedIntArray parents() { return parents; } @@ -177,7 +242,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable * taxonomy as an immediate child of {@code i}. */ @Override - public int[] children() { + public ChunkedIntArray children() { if (!initializedChildren) { initChildrenSiblings(null); } @@ -191,7 +256,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable * {@code i}. The sibling is defined as the previous youngest child of {@code parents[i]}. */ @Override - public int[] siblings() { + public ChunkedIntArray siblings() { if (!initializedChildren) { initChildrenSiblings(null); } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java index 9ba03c4eb92..138560208ec 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java @@ -20,7 +20,8 @@ import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; -import java.util.Arrays; +import java.util.List; +import java.util.Locale; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.SlowDirectory; @@ -304,7 +305,7 @@ public class TestTaxonomyCombined extends FacetTestCase { TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(1, tr.getSize()); assertEquals(0, tr.getPath(0).length); - assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]); + assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents().get(0)); assertEquals(0, tr.getOrdinal(new FacetLabel())); tr.close(); indexDir.close(); @@ -323,7 +324,7 @@ public class TestTaxonomyCombined extends FacetTestCase { TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); assertEquals(1, tr.getSize()); assertEquals(0, tr.getPath(0).length); - assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]); + assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents().get(0)); assertEquals(0, tr.getOrdinal(new FacetLabel())); tw.close(); tr.close(); @@ -412,13 +413,13 @@ public class TestTaxonomyCombined extends FacetTestCase { TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); // check that the parent of the root ordinal is the invalid ordinal: - int[] parents = tr.getParallelTaxonomyArrays().parents(); - assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[0]); + ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents(); + assertEquals(TaxonomyReader.INVALID_ORDINAL, parents.get(0)); // check parent of non-root ordinals: for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) { FacetLabel me = tr.getPath(ordinal); - int parentOrdinal = parents[ordinal]; + int parentOrdinal = parents.get(ordinal); FacetLabel parent = tr.getPath(parentOrdinal); if (parent == null) { fail( @@ -552,10 +553,10 @@ public class TestTaxonomyCombined extends FacetTestCase { tw.close(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); - int[] youngestChildArray = ca.children(); - assertEquals(tr.getSize(), youngestChildArray.length); - int[] olderSiblingArray = ca.siblings(); - assertEquals(tr.getSize(), olderSiblingArray.length); + ParallelTaxonomyArrays.IntArray youngestChildArray = ca.children(); + assertEquals(tr.getSize(), youngestChildArray.length()); + ParallelTaxonomyArrays.IntArray olderSiblingArray = ca.siblings(); + assertEquals(tr.getSize(), olderSiblingArray.length()); for (int i = 0; i < expectedCategories.length; i++) { // find expected children by looking at all expectedCategories // for children @@ -578,12 +579,12 @@ public class TestTaxonomyCombined extends FacetTestCase { // check that children and expectedChildren are the same, with the // correct reverse (youngest to oldest) order: if (expectedChildren.size() == 0) { - assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]); + assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray.get(i)); } else { - int child = youngestChildArray[i]; + int child = youngestChildArray.get(i); assertEquals(expectedChildren.get(0).intValue(), child); for (int j = 1; j < expectedChildren.size(); j++) { - child = olderSiblingArray[child]; + child = olderSiblingArray.get(child); assertEquals(expectedChildren.get(j).intValue(), child); // if child is INVALID_ORDINAL we should stop, but // assertEquals would fail in this case anyway. @@ -591,7 +592,7 @@ public class TestTaxonomyCombined extends FacetTestCase { // When we're done comparing, olderSiblingArray should now point // to INVALID_ORDINAL, saying there are no more children. If it // doesn't, we found too many children... - assertEquals(-1, olderSiblingArray[child]); + assertEquals(-1, olderSiblingArray.get(child)); } } tr.close(); @@ -613,34 +614,34 @@ public class TestTaxonomyCombined extends FacetTestCase { tw.close(); TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); - int[] children = ca.children(); - assertEquals(tr.getSize(), children.length); - int[] olderSiblingArray = ca.siblings(); - assertEquals(tr.getSize(), olderSiblingArray.length); + ParallelTaxonomyArrays.IntArray children = ca.children(); + assertEquals(tr.getSize(), children.length()); + ParallelTaxonomyArrays.IntArray olderSiblingArray = ca.siblings(); + assertEquals(tr.getSize(), olderSiblingArray.length()); // test that the "youngest child" of every category is indeed a child: - int[] parents = tr.getParallelTaxonomyArrays().parents(); + ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents(); for (int i = 0; i < tr.getSize(); i++) { - int youngestChild = children[i]; + int youngestChild = children.get(i); if (youngestChild != TaxonomyReader.INVALID_ORDINAL) { - assertEquals(i, parents[youngestChild]); + assertEquals(i, parents.get(youngestChild)); } } // test that the "older sibling" of every category is indeed older (lower) // (it can also be INVALID_ORDINAL, which is lower than any ordinal) for (int i = 0; i < tr.getSize(); i++) { - assertTrue("olderSiblingArray[" + i + "] should be <" + i, olderSiblingArray[i] < i); + assertTrue("olderSiblingArray[" + i + "] should be <" + i, olderSiblingArray.get(i) < i); } // test that the "older sibling" of every category is indeed a sibling // (they share the same parent) for (int i = 0; i < tr.getSize(); i++) { - int sibling = olderSiblingArray[i]; + int sibling = olderSiblingArray.get(i); if (sibling == TaxonomyReader.INVALID_ORDINAL) { continue; } - assertEquals(parents[i], parents[sibling]); + assertEquals(parents.get(i), parents.get(sibling)); } // And now for slightly more complex (and less "invariant-like"...) @@ -652,14 +653,14 @@ public class TestTaxonomyCombined extends FacetTestCase { // Find the really youngest child: int j; for (j = tr.getSize() - 1; j > i; j--) { - if (parents[j] == i) { + if (parents.get(j) == i) { break; // found youngest child } } if (j == i) { // no child found j = TaxonomyReader.INVALID_ORDINAL; } - assertEquals(j, children[i]); + assertEquals(j, children.get(i)); } // test that the "older sibling" is indeed the least oldest one - and @@ -669,20 +670,26 @@ public class TestTaxonomyCombined extends FacetTestCase { // Find the youngest older sibling: int j; for (j = i - 1; j >= 0; j--) { - if (parents[j] == parents[i]) { + if (parents.get(j) == parents.get(i)) { break; // found youngest older sibling } } if (j < 0) { // no sibling found j = TaxonomyReader.INVALID_ORDINAL; } - assertEquals(j, olderSiblingArray[i]); + assertEquals(j, olderSiblingArray.get(i)); } tr.close(); indexDir.close(); } + private static void assertArrayEquals(int[] expected, ParallelTaxonomyArrays.IntArray actual) { + for (int i = 0; i < expected.length; i++) { + assertEquals(expected[i], actual.get(i)); + } + } + /** Test how getChildrenArrays() deals with the taxonomy's growth: */ @Test public void testChildrenArraysGrowth() throws Exception { @@ -693,10 +700,10 @@ public class TestTaxonomyCombined extends FacetTestCase { TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); assertEquals(3, tr.getSize()); - assertEquals(3, ca.siblings().length); - assertEquals(3, ca.children().length); - assertTrue(Arrays.equals(new int[] {1, 2, -1}, ca.children())); - assertTrue(Arrays.equals(new int[] {-1, -1, -1}, ca.siblings())); + assertEquals(3, ca.siblings().length()); + assertEquals(3, ca.children().length()); + assertArrayEquals(new int[] {1, 2, -1}, ca.children()); + assertArrayEquals(new int[] {-1, -1, -1}, ca.siblings()); tw.addCategory(new FacetLabel("hi", "ho")); tw.addCategory(new FacetLabel("hello")); tw.commit(); @@ -704,8 +711,8 @@ public class TestTaxonomyCombined extends FacetTestCase { ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays(); assertSame(newca, ca); // we got exactly the same object assertEquals(3, tr.getSize()); - assertEquals(3, ca.siblings().length); - assertEquals(3, ca.children().length); + assertEquals(3, ca.siblings().length()); + assertEquals(3, ca.children().length()); // After the refresh, things change: TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr); assertNotNull(newtr); @@ -713,10 +720,10 @@ public class TestTaxonomyCombined extends FacetTestCase { tr = newtr; ca = tr.getParallelTaxonomyArrays(); assertEquals(5, tr.getSize()); - assertEquals(5, ca.siblings().length); - assertEquals(5, ca.children().length); - assertTrue(Arrays.equals(new int[] {4, 3, -1, -1, -1}, ca.children())); - assertTrue(Arrays.equals(new int[] {-1, -1, -1, 2, 1}, ca.siblings())); + assertEquals(5, ca.siblings().length()); + assertEquals(5, ca.children().length()); + assertArrayEquals(new int[] {4, 3, -1, -1, -1}, ca.children()); + assertArrayEquals(new int[] {-1, -1, -1, 2, 1}, ca.siblings()); tw.close(); tr.close(); indexDir.close(); @@ -737,7 +744,7 @@ public class TestTaxonomyCombined extends FacetTestCase { final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays(); final int abOrd = trBase.getOrdinal(abPath); - final int abYoungChildBase1 = ca1.children()[abOrd]; + final int abYoungChildBase1 = ca1.children().get(abOrd); final int numCategories = atLeast(200); for (int i = 0; i < numCategories; i++) { @@ -751,7 +758,7 @@ public class TestTaxonomyCombined extends FacetTestCase { trBase = newTaxoReader; final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays(); - final int abYoungChildBase2 = ca2.children()[abOrd]; + final int abYoungChildBase2 = ca2.children().get(abOrd); int numRetries = atLeast(10); for (int retry = 0; retry < numRetries; retry++) { @@ -799,7 +806,7 @@ public class TestTaxonomyCombined extends FacetTestCase { setPriority(1 + getPriority()); try { while (!stop.get()) { - int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1; + int lastOrd = tr.getParallelTaxonomyArrays().parents().length() - 1; assertNotNull( "path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd)); assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++); @@ -812,7 +819,7 @@ public class TestTaxonomyCombined extends FacetTestCase { } private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) { - final int abYoungChild = ca.children()[abOrd]; + final int abYoungChild = ca.children().get(abOrd); assertTrue( "Retry " + retry @@ -828,7 +835,7 @@ public class TestTaxonomyCombined extends FacetTestCase { + abYoungChildBase2 + " but was: " + abYoungChild, - abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children()[abOrd]); + abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children().get(abOrd)); } }; thread.start(); @@ -903,7 +910,8 @@ public class TestTaxonomyCombined extends FacetTestCase { int author = 1; try { - assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents()[author]); + assertEquals( + TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents().get(author)); // ok } catch ( @SuppressWarnings("unused") @@ -926,10 +934,10 @@ public class TestTaxonomyCombined extends FacetTestCase { assertNotNull(newTaxoReader); tr.close(); tr = newTaxoReader; - int[] parents = tr.getParallelTaxonomyArrays().parents(); - assertEquals(author, parents[dawkins]); - assertEquals(TaxonomyReader.ROOT_ORDINAL, parents[author]); - assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]); + ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents(); + assertEquals(author, parents.get(dawkins)); + assertEquals(TaxonomyReader.ROOT_ORDINAL, parents.get(author)); + assertEquals(TaxonomyReader.INVALID_ORDINAL, parents.get(TaxonomyReader.ROOT_ORDINAL)); assertEquals(3, tr.getSize()); tw.close(); tr.close(); @@ -1097,6 +1105,64 @@ public class TestTaxonomyCombined extends FacetTestCase { dir.close(); } + private static String[][] manyCategories(int count, int roundSize) { + String[][] result = new String[count / roundSize + 1][]; + int k = 0; + do { + k += roundSize; + List round = new ArrayList<>(); + for (int i = k - roundSize + 1; i <= k && i < count; i++) { + round.add(String.format(Locale.ROOT, "category %d of %d", i, k)); + } + result[k / roundSize - 1] = round.toArray(new String[0]); + } while (k <= count); + return result; + } + + public void testThousandsOfCategories() throws IOException { + int roundSize = random().nextInt(2, 4); + int size = random().nextInt(16384, 32768); + Directory indexDir = newDirectory(); + TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir); + String[][] manyCategories = manyCategories(size, roundSize); + for (String[] elem : manyCategories) { + if (elem == null) { + throw new IllegalStateException( + "Got null array with size = " + size + " and roundSize = " + roundSize); + } else if (elem.length > 0) { + tw.addCategory(new FacetLabel(elem)); + } + } + tw.close(); + TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir); + ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays(); + ParallelTaxonomyArrays.IntArray parents = ca.parents(); + ParallelTaxonomyArrays.IntArray children = ca.children(); + assertEquals(size, parents.length()); + assertEquals(size, children.length()); + for (int j = 1; j < size - roundSize; j += roundSize) { + // Top level categories all have root as their parent. + assertEquals(0, parents.get(j)); + for (int i = j; i < j + roundSize - 1; i++) { + // Children extend in a chain from the top level category. + // The parent/child relationships are symmetric. + assertEquals(i + 1, children.get(i)); + if (i > j) { + assertEquals(i - 1, parents.get(i)); + } + } + } + ParallelTaxonomyArrays.IntArray siblings = ca.siblings(); + assertEquals(size, siblings.length()); + for (int i = 1; i < size - roundSize; i += roundSize) { + // Each top-level category (after the first) has the previous top-level category as their + // older sibling. + assertEquals(i, siblings.get(i + roundSize)); + } + tr.close(); + indexDir.close(); + } + // TODO (Facet): test multiple readers, one writer. Have the multiple readers // using the same object (simulating threads) or different objects // (simulating processes). diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java index e3aab92be83..7efb9bd6ce3 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java @@ -25,6 +25,7 @@ import org.apache.lucene.facet.FacetField; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.taxonomy.FacetLabel; +import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache; import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache; import org.apache.lucene.index.IndexWriter; @@ -153,7 +154,7 @@ public class TestConcurrentFacetedIndexing extends FacetTestCase { } fail("mismatch number of categories"); } - int[] parents = tr.getParallelTaxonomyArrays().parents(); + ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents(); for (String cat : values.keySet()) { FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat)); assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0); @@ -163,7 +164,7 @@ public class TestConcurrentFacetedIndexing extends FacetTestCase { for (int i = 0; i < level; i++) { path = cp.subpath(i + 1); int ord = tr.getOrdinal(path); - assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]); + assertEquals("invalid parent for cp=" + path, parentOrd, parents.get(ord)); parentOrd = ord; // next level should have this parent } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java index 246edff6641..01fe8cee262 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java @@ -31,6 +31,7 @@ import java.util.Set; import java.util.stream.IntStream; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.taxonomy.FacetLabel; +import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; @@ -248,11 +249,11 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase { // assert categories assertEquals(numCategories, reader.getSize()); int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i))); - int[] parents = reader.getParallelTaxonomyArrays().parents(); - assertEquals(0, parents[roundOrdinal]); // round's parent is root + ParallelTaxonomyArrays.IntArray parents = reader.getParallelTaxonomyArrays().parents(); + assertEquals(0, parents.get(roundOrdinal)); // round's parent is root for (int j = 0; j < numCats; j++) { int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j))); - assertEquals(roundOrdinal, parents[ord]); // round's parent is root + assertEquals(roundOrdinal, parents.get(ord)); // round's parent is root } } @@ -286,7 +287,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase { TaxonomyReader reader = new DirectoryTaxonomyReader(writer); assertEquals(1, reader.getSize()); - assertEquals(1, reader.getParallelTaxonomyArrays().parents().length); + assertEquals(1, reader.getParallelTaxonomyArrays().parents().length()); // add category and call forceMerge -- this should flush IW and merge segments down to 1 // in ParentArray.initFromReader, this used to fail assuming there are no parents. @@ -299,7 +300,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase { reader.close(); reader = newtr; assertEquals(2, reader.getSize()); - assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); + assertEquals(2, reader.getParallelTaxonomyArrays().parents().length()); reader.close(); writer.close(); @@ -336,7 +337,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase { TaxonomyReader reader = new DirectoryTaxonomyReader(writer); assertEquals(2, reader.getSize()); - assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); + assertEquals(2, reader.getParallelTaxonomyArrays().parents().length()); // merge all the segments so that NRT reader thinks there's a change iw.forceMerge(1); @@ -347,7 +348,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase { reader.close(); reader = newtr; assertEquals(2, reader.getSize()); - assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); + assertEquals(2, reader.getParallelTaxonomyArrays().parents().length()); reader.close(); writer.close(); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java index eb098f08524..dd55e55a22d 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java @@ -29,6 +29,7 @@ import org.apache.lucene.facet.FacetField; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.taxonomy.FacetLabel; +import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap; import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache; @@ -366,7 +367,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase { fail("mismatch number of categories"); } - int[] parents = dtr.getParallelTaxonomyArrays().parents(); + ParallelTaxonomyArrays.IntArray parents = dtr.getParallelTaxonomyArrays().parents(); for (String cat : values.keySet()) { FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat)); assertTrue("category not found " + cp, dtr.getOrdinal(cp) > 0); @@ -376,7 +377,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase { for (int i = 0; i < level; i++) { path = cp.subpath(i + 1); int ord = dtr.getOrdinal(path); - assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]); + assertEquals("invalid parent for cp=" + path, parentOrd, parents.get(ord)); parentOrd = ord; // next level should have this parent } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestTaxonomyIndexArrays.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestTaxonomyIndexArrays.java new file mode 100644 index 00000000000..acf2776b663 --- /dev/null +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestTaxonomyIndexArrays.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.lucene.facet.taxonomy.directory; + +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.tests.util.LuceneTestCase; + +public class TestTaxonomyIndexArrays extends LuceneTestCase { + + private void checkInvariants(TaxonomyIndexArrays oldArray, TaxonomyIndexArrays newArray) { + TaxonomyIndexArrays.ChunkedIntArray oldParents = oldArray.parents(); + TaxonomyIndexArrays.ChunkedIntArray newParents = newArray.parents(); + for (int i = 0; i < oldParents.values.length - 1; i++) { + assertSame(oldParents.values[i], newParents.values[i]); + } + int lastOldChunk = oldParents.values.length - 1; + for (int i = 0; i < oldParents.values[lastOldChunk].length; i++) { + assertEquals(oldParents.values[lastOldChunk][i], newParents.values[lastOldChunk][i]); + } + } + + public void testRandom() { + TaxonomyIndexArrays oldArray = + new TaxonomyIndexArrays(new int[][] {new int[] {TaxonomyReader.INVALID_ORDINAL}}); + int numIterations = 100; + int ordinal = 1; + for (int i = 0; i < numIterations; i++) { + int newOrdinal = ordinal + random().nextInt(TaxonomyIndexArrays.CHUNK_SIZE); + TaxonomyIndexArrays newArray = oldArray.add(newOrdinal, ordinal); + checkInvariants(oldArray, newArray); + ordinal = newOrdinal; + } + } + + public void testMultiplesOfChunkSize() { + TaxonomyIndexArrays oldArray = + new TaxonomyIndexArrays(new int[][] {new int[] {TaxonomyReader.INVALID_ORDINAL}}); + int numIterations = 20; + int ordinal = TaxonomyIndexArrays.CHUNK_SIZE; + for (int i = 0; i < numIterations; i++) { + int newOrdinal = ordinal + TaxonomyIndexArrays.CHUNK_SIZE; + TaxonomyIndexArrays newArray = oldArray.add(newOrdinal, ordinal); + checkInvariants(oldArray, newArray); + ordinal = newOrdinal; + } + } +}