mirror of https://github.com/apache/lucene.git
Split taxonomy arrays across chunks (#12995)
Split taxonomy arrays across chunks Taxonomy ordinals are added in an append-only way. Instead of reallocating a single big array when loading new taxonomy ordinals and copying all the values from the previous arrays over individually, we can keep blocks of ordinals and reuse blocks from the previous arrays.
This commit is contained in:
parent
24d557a4f6
commit
2a0b7f2056
|
@ -214,6 +214,8 @@ Optimizations
|
||||||
|
|
||||||
* GITHUB#:12997 Avoid reset BlockDocsEnum#freqBuffer when indexHasFreq is false. (Zhang Chao, Adrien Grand)
|
* GITHUB#:12997 Avoid reset BlockDocsEnum#freqBuffer when indexHasFreq is false. (Zhang Chao, Adrien Grand)
|
||||||
|
|
||||||
|
* GITHUB#12989: Split taxonomy facet arrays across reusable chunks of elements to reduce allocations. (Michael Froh, Stefan Vodita)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
* GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)
|
* GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)
|
||||||
|
|
|
@ -73,7 +73,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rollup any necessary dims:
|
// Rollup any necessary dims:
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||||
String dim = ent.getKey();
|
String dim = ent.getKey();
|
||||||
DimConfig ft = ent.getValue();
|
DimConfig ft = ent.getValue();
|
||||||
|
@ -81,21 +81,21 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||||
assert dimRootOrd > 0;
|
assert dimRootOrd > 0;
|
||||||
float newValue =
|
float newValue =
|
||||||
aggregationFunction.aggregate(values[dimRootOrd], rollup(children[dimRootOrd]));
|
aggregationFunction.aggregate(values[dimRootOrd], rollup(children.get(dimRootOrd)));
|
||||||
values[dimRootOrd] = newValue;
|
values[dimRootOrd] = newValue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private float rollup(int ord) throws IOException {
|
private float rollup(int ord) throws IOException {
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
int[] siblings = getSiblings();
|
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||||
float aggregationValue = 0f;
|
float aggregationValue = 0f;
|
||||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
float childValue = aggregationFunction.aggregate(values[ord], rollup(children[ord]));
|
float childValue = aggregationFunction.aggregate(values[ord], rollup(children.get(ord)));
|
||||||
values[ord] = childValue;
|
values[ord] = childValue;
|
||||||
aggregationValue = aggregationFunction.aggregate(aggregationValue, childValue);
|
aggregationValue = aggregationFunction.aggregate(aggregationValue, childValue);
|
||||||
ord = siblings[ord];
|
ord = siblings.get(ord);
|
||||||
}
|
}
|
||||||
return aggregationValue;
|
return aggregationValue;
|
||||||
}
|
}
|
||||||
|
@ -133,10 +133,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
int[] siblings = getSiblings();
|
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||||
|
|
||||||
int ord = children[dimOrd];
|
int ord = children.get(dimOrd);
|
||||||
float aggregatedValue = 0;
|
float aggregatedValue = 0;
|
||||||
|
|
||||||
IntArrayList ordinals = new IntArrayList();
|
IntArrayList ordinals = new IntArrayList();
|
||||||
|
@ -148,7 +148,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||||
ordinals.add(ord);
|
ordinals.add(ord);
|
||||||
ordValues.add(values[ord]);
|
ordValues.add(values[ord]);
|
||||||
}
|
}
|
||||||
ord = siblings[ord];
|
ord = siblings.get(ord);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (aggregatedValue == 0) {
|
if (aggregatedValue == 0) {
|
||||||
|
@ -206,10 +206,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||||
float bottomValue = 0;
|
float bottomValue = 0;
|
||||||
int bottomOrd = Integer.MAX_VALUE;
|
int bottomOrd = Integer.MAX_VALUE;
|
||||||
|
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
int[] siblings = getSiblings();
|
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||||
|
|
||||||
int ord = children[pathOrd];
|
int ord = children.get(pathOrd);
|
||||||
float aggregatedValue = 0;
|
float aggregatedValue = 0;
|
||||||
int childCount = 0;
|
int childCount = 0;
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ord = siblings[ord];
|
ord = siblings.get(ord);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dimConfig.multiValued) {
|
if (dimConfig.multiValued) {
|
||||||
|
@ -294,8 +294,8 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
|
|
||||||
// get existing children and siblings ordinal array from TaxonomyFacets
|
// get existing children and siblings ordinal array from TaxonomyFacets
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
int[] siblings = getSiblings();
|
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||||
|
|
||||||
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||||
// string values.
|
// string values.
|
||||||
|
@ -317,7 +317,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||||
Map<String, TopChildrenForPath> intermediateResults = null;
|
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||||
|
|
||||||
// iterate over children and siblings ordinals for all dims
|
// iterate over children and siblings ordinals for all dims
|
||||||
int ord = children[TaxonomyReader.ROOT_ORDINAL];
|
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
|
||||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
String dim = taxoReader.getPath(ord).components[0];
|
String dim = taxoReader.getPath(ord).components[0];
|
||||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||||
|
@ -364,7 +364,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ord = siblings[ord];
|
ord = siblings.get(ord);
|
||||||
}
|
}
|
||||||
|
|
||||||
FacetResult[] results = new FacetResult[pq.size()];
|
FacetResult[] results = new FacetResult[pq.size()];
|
||||||
|
|
|
@ -104,7 +104,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rollup any necessary dims:
|
// Rollup any necessary dims:
|
||||||
int[] children = null;
|
ParallelTaxonomyArrays.IntArray children = null;
|
||||||
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||||
String dim = ent.getKey();
|
String dim = ent.getKey();
|
||||||
DimConfig ft = ent.getValue();
|
DimConfig ft = ent.getValue();
|
||||||
|
@ -118,7 +118,8 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
children = getChildren();
|
children = getChildren();
|
||||||
}
|
}
|
||||||
int currentValue = getValue(dimRootOrd);
|
int currentValue = getValue(dimRootOrd);
|
||||||
int newValue = aggregationFunction.aggregate(currentValue, rollup(children[dimRootOrd]));
|
int newValue =
|
||||||
|
aggregationFunction.aggregate(currentValue, rollup(children.get(dimRootOrd)));
|
||||||
setValue(dimRootOrd, newValue);
|
setValue(dimRootOrd, newValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -126,15 +127,15 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
|
|
||||||
private int rollup(int ord) throws IOException {
|
private int rollup(int ord) throws IOException {
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
int[] siblings = getSiblings();
|
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||||
int aggregatedValue = 0;
|
int aggregatedValue = 0;
|
||||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
int currentValue = getValue(ord);
|
int currentValue = getValue(ord);
|
||||||
int newValue = aggregationFunction.aggregate(currentValue, rollup(children[ord]));
|
int newValue = aggregationFunction.aggregate(currentValue, rollup(children.get(ord)));
|
||||||
setValue(ord, newValue);
|
setValue(ord, newValue);
|
||||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, getValue(ord));
|
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, getValue(ord));
|
||||||
ord = siblings[ord];
|
ord = siblings.get(ord);
|
||||||
}
|
}
|
||||||
return aggregatedValue;
|
return aggregatedValue;
|
||||||
}
|
}
|
||||||
|
@ -204,16 +205,16 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
for (IntIntCursor c : sparseValues) {
|
for (IntIntCursor c : sparseValues) {
|
||||||
int value = c.value;
|
int value = c.value;
|
||||||
int ord = c.key;
|
int ord = c.key;
|
||||||
if (parents[ord] == dimOrd && value > 0) {
|
if (parents.get(ord) == dimOrd && value > 0) {
|
||||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||||
ordinals.add(ord);
|
ordinals.add(ord);
|
||||||
ordValues.add(value);
|
ordValues.add(value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
int[] siblings = getSiblings();
|
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||||
int ord = children[dimOrd];
|
int ord = children.get(dimOrd);
|
||||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
int value = values[ord];
|
int value = values[ord];
|
||||||
if (value > 0) {
|
if (value > 0) {
|
||||||
|
@ -221,7 +222,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
ordinals.add(ord);
|
ordinals.add(ord);
|
||||||
ordValues.add(value);
|
ordValues.add(value);
|
||||||
}
|
}
|
||||||
ord = siblings[ord];
|
ord = siblings.get(ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -289,7 +290,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
for (IntIntCursor c : sparseValues) {
|
for (IntIntCursor c : sparseValues) {
|
||||||
int value = c.value;
|
int value = c.value;
|
||||||
int ord = c.key;
|
int ord = c.key;
|
||||||
if (parents[ord] == pathOrd && value > 0) {
|
if (parents.get(ord) == pathOrd && value > 0) {
|
||||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||||
childCount++;
|
childCount++;
|
||||||
if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
|
if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
|
||||||
|
@ -307,9 +308,9 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
int[] siblings = getSiblings();
|
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||||
int ord = children[pathOrd];
|
int ord = children.get(pathOrd);
|
||||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
int value = values[ord];
|
int value = values[ord];
|
||||||
if (value > 0) {
|
if (value > 0) {
|
||||||
|
@ -328,7 +329,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ord = siblings[ord];
|
ord = siblings.get(ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -355,8 +356,8 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
|
|
||||||
// get children and siblings ordinal array from TaxonomyFacets
|
// get children and siblings ordinal array from TaxonomyFacets
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
int[] siblings = getSiblings();
|
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||||
|
|
||||||
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||||
// string values.
|
// string values.
|
||||||
|
@ -378,7 +379,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
Map<String, TopChildrenForPath> intermediateResults = null;
|
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||||
|
|
||||||
// iterate over children and siblings ordinals for all dims
|
// iterate over children and siblings ordinals for all dims
|
||||||
int ord = children[TaxonomyReader.ROOT_ORDINAL];
|
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
|
||||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
String dim = taxoReader.getPath(ord).components[0];
|
String dim = taxoReader.getPath(ord).components[0];
|
||||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||||
|
@ -425,7 +426,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ord = siblings[ord];
|
ord = siblings.get(ord);
|
||||||
}
|
}
|
||||||
|
|
||||||
FacetResult[] results = new FacetResult[pq.size()];
|
FacetResult[] results = new FacetResult[pq.size()];
|
||||||
|
|
|
@ -35,6 +35,26 @@ package org.apache.lucene.facet.taxonomy;
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public abstract class ParallelTaxonomyArrays {
|
public abstract class ParallelTaxonomyArrays {
|
||||||
|
/** Abstraction that looks like an int[], but read-only. */
|
||||||
|
public abstract static class IntArray {
|
||||||
|
/** Sole constructor * */
|
||||||
|
public IntArray() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equivalent to array[i].
|
||||||
|
*
|
||||||
|
* @param i the index of the value to retrieve
|
||||||
|
* @return the value at position i
|
||||||
|
*/
|
||||||
|
public abstract int get(int i);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equivalent to array.length.
|
||||||
|
*
|
||||||
|
* @return the allocated size of the array
|
||||||
|
*/
|
||||||
|
public abstract int length();
|
||||||
|
}
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public ParallelTaxonomyArrays() {}
|
public ParallelTaxonomyArrays() {}
|
||||||
|
@ -43,17 +63,17 @@ public abstract class ParallelTaxonomyArrays {
|
||||||
* Returns the parents array, where {@code parents[i]} denotes the parent of category ordinal
|
* Returns the parents array, where {@code parents[i]} denotes the parent of category ordinal
|
||||||
* {@code i}.
|
* {@code i}.
|
||||||
*/
|
*/
|
||||||
public abstract int[] parents();
|
public abstract IntArray parents();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the children array, where {@code children[i]} denotes a child of category ordinal
|
* Returns the children array, where {@code children[i]} denotes a child of category ordinal
|
||||||
* {@code i}.
|
* {@code i}.
|
||||||
*/
|
*/
|
||||||
public abstract int[] children();
|
public abstract IntArray children();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the siblings array, where {@code siblings[i]} denotes the sibling of category ordinal
|
* Returns the siblings array, where {@code siblings[i]} denotes the sibling of category ordinal
|
||||||
* {@code i}.
|
* {@code i}.
|
||||||
*/
|
*/
|
||||||
public abstract int[] siblings();
|
public abstract IntArray siblings();
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,7 +84,7 @@ public class TaxonomyFacetLabels {
|
||||||
private int currentDocOrdinalCount;
|
private int currentDocOrdinalCount;
|
||||||
|
|
||||||
// Lazily set when nextFacetLabel(int docId, String facetDimension) is first called
|
// Lazily set when nextFacetLabel(int docId, String facetDimension) is first called
|
||||||
private int[] parents;
|
private ParallelTaxonomyArrays.IntArray parents;
|
||||||
|
|
||||||
/** Construct from a specified {@link SortedNumericDocValues} field. */
|
/** Construct from a specified {@link SortedNumericDocValues} field. */
|
||||||
public FacetLabelReader(SortedNumericDocValues ordinalValues) {
|
public FacetLabelReader(SortedNumericDocValues ordinalValues) {
|
||||||
|
@ -141,10 +141,10 @@ public class TaxonomyFacetLabels {
|
||||||
|
|
||||||
private boolean isDescendant(int ord, int ancestorOrd) {
|
private boolean isDescendant(int ord, int ancestorOrd) {
|
||||||
while (ord != INVALID_ORDINAL && ord != ROOT_ORDINAL) {
|
while (ord != INVALID_ORDINAL && ord != ROOT_ORDINAL) {
|
||||||
if (parents[ord] == ancestorOrd) {
|
if (parents.get(ord) == ancestorOrd) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
ord = parents[ord];
|
ord = parents.get(ord);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,13 +59,13 @@ abstract class TaxonomyFacets extends Facets {
|
||||||
final FacetsCollector fc;
|
final FacetsCollector fc;
|
||||||
|
|
||||||
/** Maps parent ordinal to its child, or -1 if the parent is childless. */
|
/** Maps parent ordinal to its child, or -1 if the parent is childless. */
|
||||||
private int[] children;
|
private ParallelTaxonomyArrays.IntArray children;
|
||||||
|
|
||||||
/** Maps an ordinal to its sibling, or -1 if there is no sibling. */
|
/** Maps an ordinal to its sibling, or -1 if there is no sibling. */
|
||||||
private int[] siblings;
|
private ParallelTaxonomyArrays.IntArray siblings;
|
||||||
|
|
||||||
/** Maps an ordinal to its parent, or -1 if there is no parent (root node). */
|
/** Maps an ordinal to its parent, or -1 if there is no parent (root node). */
|
||||||
final int[] parents;
|
final ParallelTaxonomyArrays.IntArray parents;
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
TaxonomyFacets(
|
TaxonomyFacets(
|
||||||
|
@ -82,7 +82,7 @@ abstract class TaxonomyFacets extends Facets {
|
||||||
* Returns int[] mapping each ordinal to its first child; this is a large array and is computed
|
* Returns int[] mapping each ordinal to its first child; this is a large array and is computed
|
||||||
* (and then saved) the first time this method is invoked.
|
* (and then saved) the first time this method is invoked.
|
||||||
*/
|
*/
|
||||||
int[] getChildren() throws IOException {
|
ParallelTaxonomyArrays.IntArray getChildren() throws IOException {
|
||||||
if (children == null) {
|
if (children == null) {
|
||||||
children = taxoReader.getParallelTaxonomyArrays().children();
|
children = taxoReader.getParallelTaxonomyArrays().children();
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,7 @@ abstract class TaxonomyFacets extends Facets {
|
||||||
* Returns int[] mapping each ordinal to its next sibling; this is a large array and is computed
|
* Returns int[] mapping each ordinal to its next sibling; this is a large array and is computed
|
||||||
* (and then saved) the first time this method is invoked.
|
* (and then saved) the first time this method is invoked.
|
||||||
*/
|
*/
|
||||||
int[] getSiblings() throws IOException {
|
ParallelTaxonomyArrays.IntArray getSiblings() throws IOException {
|
||||||
if (siblings == null) {
|
if (siblings == null) {
|
||||||
siblings = taxoReader.getParallelTaxonomyArrays().siblings();
|
siblings = taxoReader.getParallelTaxonomyArrays().siblings();
|
||||||
}
|
}
|
||||||
|
@ -150,9 +150,9 @@ abstract class TaxonomyFacets extends Facets {
|
||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] children = getChildren();
|
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||||
int[] siblings = getSiblings();
|
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||||
int ord = children[TaxonomyReader.ROOT_ORDINAL];
|
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
|
||||||
List<FacetResult> results = new ArrayList<>();
|
List<FacetResult> results = new ArrayList<>();
|
||||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
String dim = taxoReader.getPath(ord).components[0];
|
String dim = taxoReader.getPath(ord).components[0];
|
||||||
|
@ -163,7 +163,7 @@ abstract class TaxonomyFacets extends Facets {
|
||||||
results.add(result);
|
results.add(result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ord = siblings[ord];
|
ord = siblings.get(ord);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort by highest value, tie break by dim:
|
// Sort by highest value, tie break by dim:
|
||||||
|
|
|
@ -61,10 +61,10 @@ public abstract class TaxonomyReader implements Closeable {
|
||||||
/** An iterator over a category's children. */
|
/** An iterator over a category's children. */
|
||||||
public static class ChildrenIterator {
|
public static class ChildrenIterator {
|
||||||
|
|
||||||
private final int[] siblings;
|
private final ParallelTaxonomyArrays.IntArray siblings;
|
||||||
private int child;
|
private int child;
|
||||||
|
|
||||||
ChildrenIterator(int child, int[] siblings) {
|
ChildrenIterator(int child, ParallelTaxonomyArrays.IntArray siblings) {
|
||||||
this.siblings = siblings;
|
this.siblings = siblings;
|
||||||
this.child = child;
|
this.child = child;
|
||||||
}
|
}
|
||||||
|
@ -75,7 +75,7 @@ public abstract class TaxonomyReader implements Closeable {
|
||||||
public int next() {
|
public int next() {
|
||||||
int res = child;
|
int res = child;
|
||||||
if (child != TaxonomyReader.INVALID_ORDINAL) {
|
if (child != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
child = siblings[child];
|
child = siblings.get(child);
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -181,7 +181,7 @@ public abstract class TaxonomyReader implements Closeable {
|
||||||
/** Returns an iterator over the children of the given ordinal. */
|
/** Returns an iterator over the children of the given ordinal. */
|
||||||
public ChildrenIterator getChildren(final int ordinal) throws IOException {
|
public ChildrenIterator getChildren(final int ordinal) throws IOException {
|
||||||
ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays();
|
ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays();
|
||||||
int child = ordinal >= 0 ? arrays.children()[ordinal] : INVALID_ORDINAL;
|
int child = ordinal >= 0 ? arrays.children().get(ordinal) : INVALID_ORDINAL;
|
||||||
return new ChildrenIterator(child, arrays.siblings());
|
return new ChildrenIterator(child, arrays.siblings());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.facet.FacetsConfig;
|
import org.apache.lucene.facet.FacetsConfig;
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||||
|
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||||
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
|
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
|
||||||
|
@ -678,10 +679,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
// was allocated bigger than it really needs to be.
|
// was allocated bigger than it really needs to be.
|
||||||
Objects.checkIndex(ordinal, nextID.get());
|
Objects.checkIndex(ordinal, nextID.get());
|
||||||
|
|
||||||
int[] parents = getTaxoArrays().parents();
|
ParallelTaxonomyArrays.IntArray parents = getTaxoArrays().parents();
|
||||||
assert ordinal < parents.length
|
assert ordinal < parents.length()
|
||||||
: "requested ordinal (" + ordinal + "); parents.length (" + parents.length + ") !";
|
: "requested ordinal (" + ordinal + "); parents.length (" + parents.length() + ") !";
|
||||||
return parents[ordinal];
|
return parents.get(ordinal);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -38,27 +38,53 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable {
|
class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable {
|
||||||
|
private static final int CHUNK_SIZE_BITS = 13;
|
||||||
|
static final int CHUNK_SIZE = 1 << CHUNK_SIZE_BITS;
|
||||||
|
private static final int CHUNK_MASK = CHUNK_SIZE - 1;
|
||||||
|
|
||||||
private final int[] parents;
|
private final ChunkedIntArray parents;
|
||||||
|
|
||||||
// the following two arrays are lazily initialized. note that we only keep a
|
// the following two arrays are lazily initialized. note that we only keep a
|
||||||
// single boolean member as volatile, instead of declaring the arrays
|
// single boolean member as volatile, instead of declaring the arrays
|
||||||
// volatile. the code guarantees that only after the boolean is set to true,
|
// volatile. the code guarantees that only after the boolean is set to true,
|
||||||
// the arrays are returned.
|
// the arrays are returned.
|
||||||
private volatile boolean initializedChildren = false;
|
private volatile boolean initializedChildren = false;
|
||||||
private int[] children, siblings;
|
private ChunkedIntArray children, siblings;
|
||||||
|
|
||||||
/** Used by {@link #add(int, int)} after the array grew. */
|
static class ChunkedIntArray extends ParallelTaxonomyArrays.IntArray {
|
||||||
private TaxonomyIndexArrays(int[] parents) {
|
final int[][] values;
|
||||||
this.parents = parents;
|
|
||||||
|
private ChunkedIntArray(int[][] values) {
|
||||||
|
this.values = values;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int i) {
|
||||||
|
return values[i >> CHUNK_SIZE_BITS][i & CHUNK_MASK];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void set(int i, int val) {
|
||||||
|
values[i >> CHUNK_SIZE_BITS][i & CHUNK_MASK] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int length() {
|
||||||
|
return ((values.length - 1) << CHUNK_SIZE_BITS) + values[values.length - 1].length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Used by {@link #add(int, int)} after the array grew. Also, used for testing. */
|
||||||
|
TaxonomyIndexArrays(int[][] parents) {
|
||||||
|
this.parents = new ChunkedIntArray(parents);
|
||||||
}
|
}
|
||||||
|
|
||||||
public TaxonomyIndexArrays(IndexReader reader) throws IOException {
|
public TaxonomyIndexArrays(IndexReader reader) throws IOException {
|
||||||
parents = new int[reader.maxDoc()];
|
int[][] parentArray = allocateChunkedArray(reader.maxDoc(), 0);
|
||||||
if (parents.length > 0) {
|
if (parentArray.length > 0) {
|
||||||
initParents(reader, 0);
|
initParents(parentArray, reader, 0);
|
||||||
parents[0] = TaxonomyReader.INVALID_ORDINAL;
|
parentArray[0][0] = TaxonomyReader.INVALID_ORDINAL;
|
||||||
}
|
}
|
||||||
|
parents = new ChunkedIntArray(parentArray);
|
||||||
}
|
}
|
||||||
|
|
||||||
public TaxonomyIndexArrays(IndexReader reader, TaxonomyIndexArrays copyFrom) throws IOException {
|
public TaxonomyIndexArrays(IndexReader reader, TaxonomyIndexArrays copyFrom) throws IOException {
|
||||||
|
@ -68,25 +94,54 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
// it may be caused if e.g. the taxonomy segments were merged, and so an updated
|
// it may be caused if e.g. the taxonomy segments were merged, and so an updated
|
||||||
// NRT reader was obtained, even though nothing was changed. this is not very likely
|
// NRT reader was obtained, even though nothing was changed. this is not very likely
|
||||||
// to happen.
|
// to happen.
|
||||||
int[] copyParents = copyFrom.parents();
|
int[][] parentArray = allocateChunkedArray(reader.maxDoc(), copyFrom.parents.values.length - 1);
|
||||||
this.parents = new int[reader.maxDoc()];
|
if (parentArray.length > 0) {
|
||||||
System.arraycopy(copyParents, 0, parents, 0, copyParents.length);
|
copyChunkedArray(copyFrom.parents.values, parentArray);
|
||||||
initParents(reader, copyParents.length);
|
initParents(parentArray, reader, copyFrom.parents.length());
|
||||||
|
}
|
||||||
|
parents = new ChunkedIntArray(parentArray);
|
||||||
if (copyFrom.initializedChildren) {
|
if (copyFrom.initializedChildren) {
|
||||||
initChildrenSiblings(copyFrom);
|
initChildrenSiblings(copyFrom);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int[][] allocateChunkedArray(int size, int startFrom) {
|
||||||
|
int chunkCount = (size >> CHUNK_SIZE_BITS) + 1;
|
||||||
|
int[][] array = new int[chunkCount][];
|
||||||
|
for (int i = startFrom; i < chunkCount - 1; i++) {
|
||||||
|
array[i] = new int[CHUNK_SIZE];
|
||||||
|
}
|
||||||
|
array[chunkCount - 1] = new int[size & CHUNK_MASK];
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void copyChunkedArray(int[][] oldArray, int[][] newArray) {
|
||||||
|
// Copy all but the last (maybe partial) chunk from the old array
|
||||||
|
if (oldArray.length > 1) {
|
||||||
|
System.arraycopy(oldArray, 0, newArray, 0, oldArray.length - 1);
|
||||||
|
}
|
||||||
|
int[] lastCopyChunk = oldArray[oldArray.length - 1];
|
||||||
|
System.arraycopy(lastCopyChunk, 0, newArray[oldArray.length - 1], 0, lastCopyChunk.length);
|
||||||
|
}
|
||||||
|
|
||||||
private synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) {
|
private synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) {
|
||||||
if (!initializedChildren) { // must do this check !
|
if (!initializedChildren) { // must do this check !
|
||||||
children = new int[parents.length];
|
int startFrom;
|
||||||
siblings = new int[parents.length];
|
if (copyFrom == null) {
|
||||||
|
startFrom = 0;
|
||||||
|
} else {
|
||||||
|
startFrom = copyFrom.parents.values.length - 1;
|
||||||
|
}
|
||||||
|
int[][] childrenArray = allocateChunkedArray(parents.length(), startFrom);
|
||||||
|
int[][] siblingsArray = allocateChunkedArray(parents.length(), startFrom);
|
||||||
|
// Rely on these arrays being copied by reference, since we may modify them below
|
||||||
|
children = new ChunkedIntArray(childrenArray);
|
||||||
|
siblings = new ChunkedIntArray(siblingsArray);
|
||||||
if (copyFrom != null) {
|
if (copyFrom != null) {
|
||||||
// called from the ctor, after we know copyFrom has initialized children/siblings
|
// called from the ctor, after we know copyFrom has initialized children/siblings
|
||||||
System.arraycopy(copyFrom.children(), 0, children, 0, copyFrom.children().length);
|
copyChunkedArray(copyFrom.children.values, childrenArray);
|
||||||
System.arraycopy(copyFrom.siblings(), 0, siblings, 0, copyFrom.siblings().length);
|
copyChunkedArray(copyFrom.siblings.values, siblingsArray);
|
||||||
computeChildrenSiblings(copyFrom.parents.length);
|
computeChildrenSiblings(copyFrom.parents.length());
|
||||||
} else {
|
} else {
|
||||||
computeChildrenSiblings(0);
|
computeChildrenSiblings(0);
|
||||||
}
|
}
|
||||||
|
@ -98,26 +153,31 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
// reset the youngest child of all ordinals. while this should be done only
|
// reset the youngest child of all ordinals. while this should be done only
|
||||||
// for the leaves, we don't know up front which are the leaves, so we reset
|
// for the leaves, we don't know up front which are the leaves, so we reset
|
||||||
// all of them.
|
// all of them.
|
||||||
for (int i = first; i < parents.length; i++) {
|
int length = parents.length();
|
||||||
children[i] = TaxonomyReader.INVALID_ORDINAL;
|
for (int i = first; i < length; i++) {
|
||||||
|
children.set(i, TaxonomyReader.INVALID_ORDINAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
// the root category has no parent, and therefore no siblings
|
// the root category has no parent, and therefore no siblings
|
||||||
if (first == 0) {
|
if (first == 0) {
|
||||||
first = 1;
|
first = 1;
|
||||||
siblings[0] = TaxonomyReader.INVALID_ORDINAL;
|
siblings.set(0, TaxonomyReader.INVALID_ORDINAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = first; i < parents.length; i++) {
|
for (int i = first; i < length; i++) {
|
||||||
|
int parent = parents.get(i);
|
||||||
|
// The existing youngest child of the parent is the next older sibling of i.
|
||||||
// note that parents[i] is always < i, so the right-hand-side of
|
// note that parents[i] is always < i, so the right-hand-side of
|
||||||
// the following line is already set when we get here
|
// the following line is already set when we get here
|
||||||
siblings[i] = children[parents[i]];
|
siblings.set(i, children.get(parent));
|
||||||
children[parents[i]] = i;
|
// The new youngest child of the parent is i.
|
||||||
|
children.set(parent, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the parents of the new categories
|
// Read the parents of the new categories
|
||||||
private void initParents(IndexReader reader, int first) throws IOException {
|
private static void initParents(int[][] parentsArray, IndexReader reader, int first)
|
||||||
|
throws IOException {
|
||||||
if (reader.maxDoc() == first) {
|
if (reader.maxDoc() == first) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -141,7 +201,9 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
throw new CorruptIndexException(
|
throw new CorruptIndexException(
|
||||||
"Missing parent data for category " + (doc + leafContext.docBase), reader.toString());
|
"Missing parent data for category " + (doc + leafContext.docBase), reader.toString());
|
||||||
}
|
}
|
||||||
parents[doc + leafContext.docBase] = Math.toIntExact(parentValues.longValue());
|
int pos = doc + leafContext.docBase;
|
||||||
|
parentsArray[pos >> CHUNK_SIZE_BITS][pos & CHUNK_MASK] =
|
||||||
|
Math.toIntExact(parentValues.longValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -153,12 +215,15 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
* <p><b>NOTE:</b> you should call this method from a thread-safe code.
|
* <p><b>NOTE:</b> you should call this method from a thread-safe code.
|
||||||
*/
|
*/
|
||||||
TaxonomyIndexArrays add(int ordinal, int parentOrdinal) {
|
TaxonomyIndexArrays add(int ordinal, int parentOrdinal) {
|
||||||
if (ordinal >= parents.length) {
|
if (ordinal >= parents.length()) {
|
||||||
int[] newarray = ArrayUtil.grow(parents, ordinal + 1);
|
int[][] newParents =
|
||||||
newarray[ordinal] = parentOrdinal;
|
allocateChunkedArray(
|
||||||
return new TaxonomyIndexArrays(newarray);
|
ArrayUtil.oversize(ordinal + 1, Integer.BYTES), parents.values.length - 1);
|
||||||
|
copyChunkedArray(parents.values, newParents);
|
||||||
|
newParents[ordinal >> CHUNK_SIZE_BITS][ordinal & CHUNK_MASK] = parentOrdinal;
|
||||||
|
return new TaxonomyIndexArrays(newParents);
|
||||||
}
|
}
|
||||||
parents[ordinal] = parentOrdinal;
|
parents.set(ordinal, parentOrdinal);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,7 +232,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
* {@code i}.
|
* {@code i}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int[] parents() {
|
public ChunkedIntArray parents() {
|
||||||
return parents;
|
return parents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,7 +242,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
* taxonomy as an immediate child of {@code i}.
|
* taxonomy as an immediate child of {@code i}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int[] children() {
|
public ChunkedIntArray children() {
|
||||||
if (!initializedChildren) {
|
if (!initializedChildren) {
|
||||||
initChildrenSiblings(null);
|
initChildrenSiblings(null);
|
||||||
}
|
}
|
||||||
|
@ -191,7 +256,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
* {@code i}. The sibling is defined as the previous youngest child of {@code parents[i]}.
|
* {@code i}. The sibling is defined as the previous youngest child of {@code parents[i]}.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int[] siblings() {
|
public ChunkedIntArray siblings() {
|
||||||
if (!initializedChildren) {
|
if (!initializedChildren) {
|
||||||
initChildrenSiblings(null);
|
initChildrenSiblings(null);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,8 @@ import java.io.IOException;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.SlowDirectory;
|
import org.apache.lucene.facet.SlowDirectory;
|
||||||
|
@ -304,7 +305,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||||
assertEquals(1, tr.getSize());
|
assertEquals(1, tr.getSize());
|
||||||
assertEquals(0, tr.getPath(0).length);
|
assertEquals(0, tr.getPath(0).length);
|
||||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents().get(0));
|
||||||
assertEquals(0, tr.getOrdinal(new FacetLabel()));
|
assertEquals(0, tr.getOrdinal(new FacetLabel()));
|
||||||
tr.close();
|
tr.close();
|
||||||
indexDir.close();
|
indexDir.close();
|
||||||
|
@ -323,7 +324,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||||
assertEquals(1, tr.getSize());
|
assertEquals(1, tr.getSize());
|
||||||
assertEquals(0, tr.getPath(0).length);
|
assertEquals(0, tr.getPath(0).length);
|
||||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents().get(0));
|
||||||
assertEquals(0, tr.getOrdinal(new FacetLabel()));
|
assertEquals(0, tr.getOrdinal(new FacetLabel()));
|
||||||
tw.close();
|
tw.close();
|
||||||
tr.close();
|
tr.close();
|
||||||
|
@ -412,13 +413,13 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||||
|
|
||||||
// check that the parent of the root ordinal is the invalid ordinal:
|
// check that the parent of the root ordinal is the invalid ordinal:
|
||||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents();
|
||||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[0]);
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents.get(0));
|
||||||
|
|
||||||
// check parent of non-root ordinals:
|
// check parent of non-root ordinals:
|
||||||
for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
|
for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
|
||||||
FacetLabel me = tr.getPath(ordinal);
|
FacetLabel me = tr.getPath(ordinal);
|
||||||
int parentOrdinal = parents[ordinal];
|
int parentOrdinal = parents.get(ordinal);
|
||||||
FacetLabel parent = tr.getPath(parentOrdinal);
|
FacetLabel parent = tr.getPath(parentOrdinal);
|
||||||
if (parent == null) {
|
if (parent == null) {
|
||||||
fail(
|
fail(
|
||||||
|
@ -552,10 +553,10 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
tw.close();
|
tw.close();
|
||||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||||
int[] youngestChildArray = ca.children();
|
ParallelTaxonomyArrays.IntArray youngestChildArray = ca.children();
|
||||||
assertEquals(tr.getSize(), youngestChildArray.length);
|
assertEquals(tr.getSize(), youngestChildArray.length());
|
||||||
int[] olderSiblingArray = ca.siblings();
|
ParallelTaxonomyArrays.IntArray olderSiblingArray = ca.siblings();
|
||||||
assertEquals(tr.getSize(), olderSiblingArray.length);
|
assertEquals(tr.getSize(), olderSiblingArray.length());
|
||||||
for (int i = 0; i < expectedCategories.length; i++) {
|
for (int i = 0; i < expectedCategories.length; i++) {
|
||||||
// find expected children by looking at all expectedCategories
|
// find expected children by looking at all expectedCategories
|
||||||
// for children
|
// for children
|
||||||
|
@ -578,12 +579,12 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
// check that children and expectedChildren are the same, with the
|
// check that children and expectedChildren are the same, with the
|
||||||
// correct reverse (youngest to oldest) order:
|
// correct reverse (youngest to oldest) order:
|
||||||
if (expectedChildren.size() == 0) {
|
if (expectedChildren.size() == 0) {
|
||||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray.get(i));
|
||||||
} else {
|
} else {
|
||||||
int child = youngestChildArray[i];
|
int child = youngestChildArray.get(i);
|
||||||
assertEquals(expectedChildren.get(0).intValue(), child);
|
assertEquals(expectedChildren.get(0).intValue(), child);
|
||||||
for (int j = 1; j < expectedChildren.size(); j++) {
|
for (int j = 1; j < expectedChildren.size(); j++) {
|
||||||
child = olderSiblingArray[child];
|
child = olderSiblingArray.get(child);
|
||||||
assertEquals(expectedChildren.get(j).intValue(), child);
|
assertEquals(expectedChildren.get(j).intValue(), child);
|
||||||
// if child is INVALID_ORDINAL we should stop, but
|
// if child is INVALID_ORDINAL we should stop, but
|
||||||
// assertEquals would fail in this case anyway.
|
// assertEquals would fail in this case anyway.
|
||||||
|
@ -591,7 +592,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
// When we're done comparing, olderSiblingArray should now point
|
// When we're done comparing, olderSiblingArray should now point
|
||||||
// to INVALID_ORDINAL, saying there are no more children. If it
|
// to INVALID_ORDINAL, saying there are no more children. If it
|
||||||
// doesn't, we found too many children...
|
// doesn't, we found too many children...
|
||||||
assertEquals(-1, olderSiblingArray[child]);
|
assertEquals(-1, olderSiblingArray.get(child));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tr.close();
|
tr.close();
|
||||||
|
@ -613,34 +614,34 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
tw.close();
|
tw.close();
|
||||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||||
int[] children = ca.children();
|
ParallelTaxonomyArrays.IntArray children = ca.children();
|
||||||
assertEquals(tr.getSize(), children.length);
|
assertEquals(tr.getSize(), children.length());
|
||||||
int[] olderSiblingArray = ca.siblings();
|
ParallelTaxonomyArrays.IntArray olderSiblingArray = ca.siblings();
|
||||||
assertEquals(tr.getSize(), olderSiblingArray.length);
|
assertEquals(tr.getSize(), olderSiblingArray.length());
|
||||||
|
|
||||||
// test that the "youngest child" of every category is indeed a child:
|
// test that the "youngest child" of every category is indeed a child:
|
||||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents();
|
||||||
for (int i = 0; i < tr.getSize(); i++) {
|
for (int i = 0; i < tr.getSize(); i++) {
|
||||||
int youngestChild = children[i];
|
int youngestChild = children.get(i);
|
||||||
if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
|
if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
assertEquals(i, parents[youngestChild]);
|
assertEquals(i, parents.get(youngestChild));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// test that the "older sibling" of every category is indeed older (lower)
|
// test that the "older sibling" of every category is indeed older (lower)
|
||||||
// (it can also be INVALID_ORDINAL, which is lower than any ordinal)
|
// (it can also be INVALID_ORDINAL, which is lower than any ordinal)
|
||||||
for (int i = 0; i < tr.getSize(); i++) {
|
for (int i = 0; i < tr.getSize(); i++) {
|
||||||
assertTrue("olderSiblingArray[" + i + "] should be <" + i, olderSiblingArray[i] < i);
|
assertTrue("olderSiblingArray[" + i + "] should be <" + i, olderSiblingArray.get(i) < i);
|
||||||
}
|
}
|
||||||
|
|
||||||
// test that the "older sibling" of every category is indeed a sibling
|
// test that the "older sibling" of every category is indeed a sibling
|
||||||
// (they share the same parent)
|
// (they share the same parent)
|
||||||
for (int i = 0; i < tr.getSize(); i++) {
|
for (int i = 0; i < tr.getSize(); i++) {
|
||||||
int sibling = olderSiblingArray[i];
|
int sibling = olderSiblingArray.get(i);
|
||||||
if (sibling == TaxonomyReader.INVALID_ORDINAL) {
|
if (sibling == TaxonomyReader.INVALID_ORDINAL) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
assertEquals(parents[i], parents[sibling]);
|
assertEquals(parents.get(i), parents.get(sibling));
|
||||||
}
|
}
|
||||||
|
|
||||||
// And now for slightly more complex (and less "invariant-like"...)
|
// And now for slightly more complex (and less "invariant-like"...)
|
||||||
|
@ -652,14 +653,14 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
// Find the really youngest child:
|
// Find the really youngest child:
|
||||||
int j;
|
int j;
|
||||||
for (j = tr.getSize() - 1; j > i; j--) {
|
for (j = tr.getSize() - 1; j > i; j--) {
|
||||||
if (parents[j] == i) {
|
if (parents.get(j) == i) {
|
||||||
break; // found youngest child
|
break; // found youngest child
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (j == i) { // no child found
|
if (j == i) { // no child found
|
||||||
j = TaxonomyReader.INVALID_ORDINAL;
|
j = TaxonomyReader.INVALID_ORDINAL;
|
||||||
}
|
}
|
||||||
assertEquals(j, children[i]);
|
assertEquals(j, children.get(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
// test that the "older sibling" is indeed the least oldest one - and
|
// test that the "older sibling" is indeed the least oldest one - and
|
||||||
|
@ -669,20 +670,26 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
// Find the youngest older sibling:
|
// Find the youngest older sibling:
|
||||||
int j;
|
int j;
|
||||||
for (j = i - 1; j >= 0; j--) {
|
for (j = i - 1; j >= 0; j--) {
|
||||||
if (parents[j] == parents[i]) {
|
if (parents.get(j) == parents.get(i)) {
|
||||||
break; // found youngest older sibling
|
break; // found youngest older sibling
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (j < 0) { // no sibling found
|
if (j < 0) { // no sibling found
|
||||||
j = TaxonomyReader.INVALID_ORDINAL;
|
j = TaxonomyReader.INVALID_ORDINAL;
|
||||||
}
|
}
|
||||||
assertEquals(j, olderSiblingArray[i]);
|
assertEquals(j, olderSiblingArray.get(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
tr.close();
|
tr.close();
|
||||||
indexDir.close();
|
indexDir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void assertArrayEquals(int[] expected, ParallelTaxonomyArrays.IntArray actual) {
|
||||||
|
for (int i = 0; i < expected.length; i++) {
|
||||||
|
assertEquals(expected[i], actual.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Test how getChildrenArrays() deals with the taxonomy's growth: */
|
/** Test how getChildrenArrays() deals with the taxonomy's growth: */
|
||||||
@Test
|
@Test
|
||||||
public void testChildrenArraysGrowth() throws Exception {
|
public void testChildrenArraysGrowth() throws Exception {
|
||||||
|
@ -693,10 +700,10 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||||
assertEquals(3, tr.getSize());
|
assertEquals(3, tr.getSize());
|
||||||
assertEquals(3, ca.siblings().length);
|
assertEquals(3, ca.siblings().length());
|
||||||
assertEquals(3, ca.children().length);
|
assertEquals(3, ca.children().length());
|
||||||
assertTrue(Arrays.equals(new int[] {1, 2, -1}, ca.children()));
|
assertArrayEquals(new int[] {1, 2, -1}, ca.children());
|
||||||
assertTrue(Arrays.equals(new int[] {-1, -1, -1}, ca.siblings()));
|
assertArrayEquals(new int[] {-1, -1, -1}, ca.siblings());
|
||||||
tw.addCategory(new FacetLabel("hi", "ho"));
|
tw.addCategory(new FacetLabel("hi", "ho"));
|
||||||
tw.addCategory(new FacetLabel("hello"));
|
tw.addCategory(new FacetLabel("hello"));
|
||||||
tw.commit();
|
tw.commit();
|
||||||
|
@ -704,8 +711,8 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays();
|
ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays();
|
||||||
assertSame(newca, ca); // we got exactly the same object
|
assertSame(newca, ca); // we got exactly the same object
|
||||||
assertEquals(3, tr.getSize());
|
assertEquals(3, tr.getSize());
|
||||||
assertEquals(3, ca.siblings().length);
|
assertEquals(3, ca.siblings().length());
|
||||||
assertEquals(3, ca.children().length);
|
assertEquals(3, ca.children().length());
|
||||||
// After the refresh, things change:
|
// After the refresh, things change:
|
||||||
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
|
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
|
||||||
assertNotNull(newtr);
|
assertNotNull(newtr);
|
||||||
|
@ -713,10 +720,10 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
tr = newtr;
|
tr = newtr;
|
||||||
ca = tr.getParallelTaxonomyArrays();
|
ca = tr.getParallelTaxonomyArrays();
|
||||||
assertEquals(5, tr.getSize());
|
assertEquals(5, tr.getSize());
|
||||||
assertEquals(5, ca.siblings().length);
|
assertEquals(5, ca.siblings().length());
|
||||||
assertEquals(5, ca.children().length);
|
assertEquals(5, ca.children().length());
|
||||||
assertTrue(Arrays.equals(new int[] {4, 3, -1, -1, -1}, ca.children()));
|
assertArrayEquals(new int[] {4, 3, -1, -1, -1}, ca.children());
|
||||||
assertTrue(Arrays.equals(new int[] {-1, -1, -1, 2, 1}, ca.siblings()));
|
assertArrayEquals(new int[] {-1, -1, -1, 2, 1}, ca.siblings());
|
||||||
tw.close();
|
tw.close();
|
||||||
tr.close();
|
tr.close();
|
||||||
indexDir.close();
|
indexDir.close();
|
||||||
|
@ -737,7 +744,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays();
|
final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays();
|
||||||
|
|
||||||
final int abOrd = trBase.getOrdinal(abPath);
|
final int abOrd = trBase.getOrdinal(abPath);
|
||||||
final int abYoungChildBase1 = ca1.children()[abOrd];
|
final int abYoungChildBase1 = ca1.children().get(abOrd);
|
||||||
|
|
||||||
final int numCategories = atLeast(200);
|
final int numCategories = atLeast(200);
|
||||||
for (int i = 0; i < numCategories; i++) {
|
for (int i = 0; i < numCategories; i++) {
|
||||||
|
@ -751,7 +758,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
trBase = newTaxoReader;
|
trBase = newTaxoReader;
|
||||||
|
|
||||||
final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays();
|
final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays();
|
||||||
final int abYoungChildBase2 = ca2.children()[abOrd];
|
final int abYoungChildBase2 = ca2.children().get(abOrd);
|
||||||
|
|
||||||
int numRetries = atLeast(10);
|
int numRetries = atLeast(10);
|
||||||
for (int retry = 0; retry < numRetries; retry++) {
|
for (int retry = 0; retry < numRetries; retry++) {
|
||||||
|
@ -799,7 +806,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
setPriority(1 + getPriority());
|
setPriority(1 + getPriority());
|
||||||
try {
|
try {
|
||||||
while (!stop.get()) {
|
while (!stop.get()) {
|
||||||
int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1;
|
int lastOrd = tr.getParallelTaxonomyArrays().parents().length() - 1;
|
||||||
assertNotNull(
|
assertNotNull(
|
||||||
"path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd));
|
"path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd));
|
||||||
assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++);
|
assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++);
|
||||||
|
@ -812,7 +819,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) {
|
private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) {
|
||||||
final int abYoungChild = ca.children()[abOrd];
|
final int abYoungChild = ca.children().get(abOrd);
|
||||||
assertTrue(
|
assertTrue(
|
||||||
"Retry "
|
"Retry "
|
||||||
+ retry
|
+ retry
|
||||||
|
@ -828,7 +835,7 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
+ abYoungChildBase2
|
+ abYoungChildBase2
|
||||||
+ " but was: "
|
+ " but was: "
|
||||||
+ abYoungChild,
|
+ abYoungChild,
|
||||||
abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children()[abOrd]);
|
abYoungChildBase1 == abYoungChild || abYoungChildBase2 == ca.children().get(abOrd));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
thread.start();
|
thread.start();
|
||||||
|
@ -903,7 +910,8 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
|
|
||||||
int author = 1;
|
int author = 1;
|
||||||
try {
|
try {
|
||||||
assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents()[author]);
|
assertEquals(
|
||||||
|
TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents().get(author));
|
||||||
// ok
|
// ok
|
||||||
} catch (
|
} catch (
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
|
@ -926,10 +934,10 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
assertNotNull(newTaxoReader);
|
assertNotNull(newTaxoReader);
|
||||||
tr.close();
|
tr.close();
|
||||||
tr = newTaxoReader;
|
tr = newTaxoReader;
|
||||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents();
|
||||||
assertEquals(author, parents[dawkins]);
|
assertEquals(author, parents.get(dawkins));
|
||||||
assertEquals(TaxonomyReader.ROOT_ORDINAL, parents[author]);
|
assertEquals(TaxonomyReader.ROOT_ORDINAL, parents.get(author));
|
||||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]);
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, parents.get(TaxonomyReader.ROOT_ORDINAL));
|
||||||
assertEquals(3, tr.getSize());
|
assertEquals(3, tr.getSize());
|
||||||
tw.close();
|
tw.close();
|
||||||
tr.close();
|
tr.close();
|
||||||
|
@ -1097,6 +1105,64 @@ public class TestTaxonomyCombined extends FacetTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String[][] manyCategories(int count, int roundSize) {
|
||||||
|
String[][] result = new String[count / roundSize + 1][];
|
||||||
|
int k = 0;
|
||||||
|
do {
|
||||||
|
k += roundSize;
|
||||||
|
List<String> round = new ArrayList<>();
|
||||||
|
for (int i = k - roundSize + 1; i <= k && i < count; i++) {
|
||||||
|
round.add(String.format(Locale.ROOT, "category %d of %d", i, k));
|
||||||
|
}
|
||||||
|
result[k / roundSize - 1] = round.toArray(new String[0]);
|
||||||
|
} while (k <= count);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testThousandsOfCategories() throws IOException {
|
||||||
|
int roundSize = random().nextInt(2, 4);
|
||||||
|
int size = random().nextInt(16384, 32768);
|
||||||
|
Directory indexDir = newDirectory();
|
||||||
|
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
|
||||||
|
String[][] manyCategories = manyCategories(size, roundSize);
|
||||||
|
for (String[] elem : manyCategories) {
|
||||||
|
if (elem == null) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Got null array with size = " + size + " and roundSize = " + roundSize);
|
||||||
|
} else if (elem.length > 0) {
|
||||||
|
tw.addCategory(new FacetLabel(elem));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tw.close();
|
||||||
|
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||||
|
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||||
|
ParallelTaxonomyArrays.IntArray parents = ca.parents();
|
||||||
|
ParallelTaxonomyArrays.IntArray children = ca.children();
|
||||||
|
assertEquals(size, parents.length());
|
||||||
|
assertEquals(size, children.length());
|
||||||
|
for (int j = 1; j < size - roundSize; j += roundSize) {
|
||||||
|
// Top level categories all have root as their parent.
|
||||||
|
assertEquals(0, parents.get(j));
|
||||||
|
for (int i = j; i < j + roundSize - 1; i++) {
|
||||||
|
// Children extend in a chain from the top level category.
|
||||||
|
// The parent/child relationships are symmetric.
|
||||||
|
assertEquals(i + 1, children.get(i));
|
||||||
|
if (i > j) {
|
||||||
|
assertEquals(i - 1, parents.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ParallelTaxonomyArrays.IntArray siblings = ca.siblings();
|
||||||
|
assertEquals(size, siblings.length());
|
||||||
|
for (int i = 1; i < size - roundSize; i += roundSize) {
|
||||||
|
// Each top-level category (after the first) has the previous top-level category as their
|
||||||
|
// older sibling.
|
||||||
|
assertEquals(i, siblings.get(i + roundSize));
|
||||||
|
}
|
||||||
|
tr.close();
|
||||||
|
indexDir.close();
|
||||||
|
}
|
||||||
|
|
||||||
// TODO (Facet): test multiple readers, one writer. Have the multiple readers
|
// TODO (Facet): test multiple readers, one writer. Have the multiple readers
|
||||||
// using the same object (simulating threads) or different objects
|
// using the same object (simulating threads) or different objects
|
||||||
// (simulating processes).
|
// (simulating processes).
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.facet.FacetField;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.FacetsConfig;
|
import org.apache.lucene.facet.FacetsConfig;
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||||
|
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
||||||
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
|
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
|
||||||
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
@ -153,7 +154,7 @@ public class TestConcurrentFacetedIndexing extends FacetTestCase {
|
||||||
}
|
}
|
||||||
fail("mismatch number of categories");
|
fail("mismatch number of categories");
|
||||||
}
|
}
|
||||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
ParallelTaxonomyArrays.IntArray parents = tr.getParallelTaxonomyArrays().parents();
|
||||||
for (String cat : values.keySet()) {
|
for (String cat : values.keySet()) {
|
||||||
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
|
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
|
||||||
assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
|
assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
|
||||||
|
@ -163,7 +164,7 @@ public class TestConcurrentFacetedIndexing extends FacetTestCase {
|
||||||
for (int i = 0; i < level; i++) {
|
for (int i = 0; i < level; i++) {
|
||||||
path = cp.subpath(i + 1);
|
path = cp.subpath(i + 1);
|
||||||
int ord = tr.getOrdinal(path);
|
int ord = tr.getOrdinal(path);
|
||||||
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
|
assertEquals("invalid parent for cp=" + path, parentOrd, parents.get(ord));
|
||||||
parentOrd = ord; // next level should have this parent
|
parentOrd = ord; // next level should have this parent
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Set;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||||
|
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||||
|
@ -248,11 +249,11 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
||||||
// assert categories
|
// assert categories
|
||||||
assertEquals(numCategories, reader.getSize());
|
assertEquals(numCategories, reader.getSize());
|
||||||
int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
|
int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
|
||||||
int[] parents = reader.getParallelTaxonomyArrays().parents();
|
ParallelTaxonomyArrays.IntArray parents = reader.getParallelTaxonomyArrays().parents();
|
||||||
assertEquals(0, parents[roundOrdinal]); // round's parent is root
|
assertEquals(0, parents.get(roundOrdinal)); // round's parent is root
|
||||||
for (int j = 0; j < numCats; j++) {
|
for (int j = 0; j < numCats; j++) {
|
||||||
int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
|
int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
|
||||||
assertEquals(roundOrdinal, parents[ord]); // round's parent is root
|
assertEquals(roundOrdinal, parents.get(ord)); // round's parent is root
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -286,7 +287,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
||||||
|
|
||||||
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
||||||
assertEquals(1, reader.getSize());
|
assertEquals(1, reader.getSize());
|
||||||
assertEquals(1, reader.getParallelTaxonomyArrays().parents().length);
|
assertEquals(1, reader.getParallelTaxonomyArrays().parents().length());
|
||||||
|
|
||||||
// add category and call forceMerge -- this should flush IW and merge segments down to 1
|
// add category and call forceMerge -- this should flush IW and merge segments down to 1
|
||||||
// in ParentArray.initFromReader, this used to fail assuming there are no parents.
|
// in ParentArray.initFromReader, this used to fail assuming there are no parents.
|
||||||
|
@ -299,7 +300,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
reader = newtr;
|
reader = newtr;
|
||||||
assertEquals(2, reader.getSize());
|
assertEquals(2, reader.getSize());
|
||||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
|
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length());
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
writer.close();
|
writer.close();
|
||||||
|
@ -336,7 +337,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
||||||
|
|
||||||
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
||||||
assertEquals(2, reader.getSize());
|
assertEquals(2, reader.getSize());
|
||||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
|
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length());
|
||||||
|
|
||||||
// merge all the segments so that NRT reader thinks there's a change
|
// merge all the segments so that NRT reader thinks there's a change
|
||||||
iw.forceMerge(1);
|
iw.forceMerge(1);
|
||||||
|
@ -347,7 +348,7 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
reader = newtr;
|
reader = newtr;
|
||||||
assertEquals(2, reader.getSize());
|
assertEquals(2, reader.getSize());
|
||||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
|
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length());
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.facet.FacetField;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.FacetsConfig;
|
import org.apache.lucene.facet.FacetsConfig;
|
||||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||||
|
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
|
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
|
||||||
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
|
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
|
||||||
|
@ -366,7 +367,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
|
||||||
fail("mismatch number of categories");
|
fail("mismatch number of categories");
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] parents = dtr.getParallelTaxonomyArrays().parents();
|
ParallelTaxonomyArrays.IntArray parents = dtr.getParallelTaxonomyArrays().parents();
|
||||||
for (String cat : values.keySet()) {
|
for (String cat : values.keySet()) {
|
||||||
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
|
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
|
||||||
assertTrue("category not found " + cp, dtr.getOrdinal(cp) > 0);
|
assertTrue("category not found " + cp, dtr.getOrdinal(cp) > 0);
|
||||||
|
@ -376,7 +377,7 @@ public class TestDirectoryTaxonomyWriter extends FacetTestCase {
|
||||||
for (int i = 0; i < level; i++) {
|
for (int i = 0; i < level; i++) {
|
||||||
path = cp.subpath(i + 1);
|
path = cp.subpath(i + 1);
|
||||||
int ord = dtr.getOrdinal(path);
|
int ord = dtr.getOrdinal(path);
|
||||||
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
|
assertEquals("invalid parent for cp=" + path, parentOrd, parents.get(ord));
|
||||||
parentOrd = ord; // next level should have this parent
|
parentOrd = ord; // next level should have this parent
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.facet.taxonomy.directory;
|
||||||
|
|
||||||
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestTaxonomyIndexArrays extends LuceneTestCase {
|
||||||
|
|
||||||
|
private void checkInvariants(TaxonomyIndexArrays oldArray, TaxonomyIndexArrays newArray) {
|
||||||
|
TaxonomyIndexArrays.ChunkedIntArray oldParents = oldArray.parents();
|
||||||
|
TaxonomyIndexArrays.ChunkedIntArray newParents = newArray.parents();
|
||||||
|
for (int i = 0; i < oldParents.values.length - 1; i++) {
|
||||||
|
assertSame(oldParents.values[i], newParents.values[i]);
|
||||||
|
}
|
||||||
|
int lastOldChunk = oldParents.values.length - 1;
|
||||||
|
for (int i = 0; i < oldParents.values[lastOldChunk].length; i++) {
|
||||||
|
assertEquals(oldParents.values[lastOldChunk][i], newParents.values[lastOldChunk][i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandom() {
|
||||||
|
TaxonomyIndexArrays oldArray =
|
||||||
|
new TaxonomyIndexArrays(new int[][] {new int[] {TaxonomyReader.INVALID_ORDINAL}});
|
||||||
|
int numIterations = 100;
|
||||||
|
int ordinal = 1;
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
int newOrdinal = ordinal + random().nextInt(TaxonomyIndexArrays.CHUNK_SIZE);
|
||||||
|
TaxonomyIndexArrays newArray = oldArray.add(newOrdinal, ordinal);
|
||||||
|
checkInvariants(oldArray, newArray);
|
||||||
|
ordinal = newOrdinal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMultiplesOfChunkSize() {
|
||||||
|
TaxonomyIndexArrays oldArray =
|
||||||
|
new TaxonomyIndexArrays(new int[][] {new int[] {TaxonomyReader.INVALID_ORDINAL}});
|
||||||
|
int numIterations = 20;
|
||||||
|
int ordinal = TaxonomyIndexArrays.CHUNK_SIZE;
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
int newOrdinal = ordinal + TaxonomyIndexArrays.CHUNK_SIZE;
|
||||||
|
TaxonomyIndexArrays newArray = oldArray.add(newOrdinal, ordinal);
|
||||||
|
checkInvariants(oldArray, newArray);
|
||||||
|
ordinal = newOrdinal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue