mirror of https://github.com/apache/lucene.git
LUCENE-10250: Add support for arbitrary length hierarchical SSDV facets (#509)
This commit is contained in:
parent
12c526595c
commit
c7650cdec2
|
@ -66,6 +66,8 @@ New Features
|
|||
* LUCENE-10335: Add ModuleResourceLoader as complement to ClasspathResourceLoader.
|
||||
(Uwe Schindler)
|
||||
|
||||
* LUCENE-10250: Add support for arbitrary length hierarchical SSDV facets. (Marc D'mello)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -478,19 +478,35 @@ public class FacetsConfig {
|
|||
|
||||
private void processSSDVFacetFields(
|
||||
Map<String, List<SortedSetDocValuesFacetField>> byField, Document doc) {
|
||||
|
||||
for (Map.Entry<String, List<SortedSetDocValuesFacetField>> ent : byField.entrySet()) {
|
||||
|
||||
String indexFieldName = ent.getKey();
|
||||
|
||||
for (SortedSetDocValuesFacetField facetField : ent.getValue()) {
|
||||
FacetLabel facetLabel = new FacetLabel(facetField.dim, facetField.label);
|
||||
String fullPath = pathToString(facetLabel.components, facetLabel.length);
|
||||
|
||||
// For facet counts:
|
||||
doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
|
||||
|
||||
FacetLabel facetLabel = new FacetLabel(facetField.dim, facetField.path);
|
||||
DimConfig dimConfig = getDimConfig(facetField.dim);
|
||||
if (dimConfig.hierarchical) {
|
||||
for (int i = 0; i < facetLabel.length; i++) {
|
||||
String fullPath = pathToString(facetLabel.components, i + 1);
|
||||
// For facet counts:
|
||||
doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
|
||||
}
|
||||
} else {
|
||||
if (facetLabel.length != 2) {
|
||||
throw new IllegalArgumentException(
|
||||
"dimension \""
|
||||
+ facetField.dim
|
||||
+ "\" is not hierarchical yet has "
|
||||
+ facetField.path.length
|
||||
+ " components");
|
||||
}
|
||||
String fullPath = pathToString(facetLabel.components, facetLabel.length);
|
||||
// For facet counts:
|
||||
doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
|
||||
}
|
||||
// For drill-down:
|
||||
indexDrillDownTerms(doc, indexFieldName, getDimConfig(facetField.dim), facetLabel);
|
||||
indexDrillDownTerms(doc, indexFieldName, dimConfig, facetLabel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -599,7 +615,7 @@ public class FacetsConfig {
|
|||
private static final char ESCAPE_CHAR = '\u001E';
|
||||
|
||||
/** Turns a dim + path into an encoded string. */
|
||||
public static String pathToString(String dim, String[] path) {
|
||||
public static String pathToString(String dim, String... path) {
|
||||
String[] fullPath = new String[1 + path.length];
|
||||
fullPath[0] = dim;
|
||||
System.arraycopy(path, 0, fullPath, 1, path.length);
|
||||
|
|
|
@ -22,7 +22,8 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.PrimitiveIterator;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
@ -64,10 +65,13 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
|||
|
||||
final ExecutorService exec;
|
||||
final SortedSetDocValuesReaderState state;
|
||||
final FacetsConfig stateConfig;
|
||||
final SortedSetDocValues dv;
|
||||
final String field;
|
||||
final AtomicIntegerArray counts;
|
||||
|
||||
private static final String[] emptyPath = new String[0];
|
||||
|
||||
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
|
||||
public ConcurrentSortedSetDocValuesFacetCounts(
|
||||
SortedSetDocValuesReaderState state, ExecutorService exec)
|
||||
|
@ -81,6 +85,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
|||
throws IOException, InterruptedException {
|
||||
this.state = state;
|
||||
this.field = state.getField();
|
||||
this.stateConfig = Objects.requireNonNullElse(state.getFacetsConfig(), new FacetsConfig());
|
||||
this.exec = exec;
|
||||
dv = state.getDocValues();
|
||||
counts = new AtomicIntegerArray(state.getSize());
|
||||
|
@ -97,17 +102,32 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
|||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
if (path.length > 0) {
|
||||
throw new IllegalArgumentException("path should be 0 length");
|
||||
|
||||
if (stateConfig.getDimConfig(dim).hierarchical) {
|
||||
int pathOrd = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
|
||||
if (pathOrd < 0) {
|
||||
// path was never indexed
|
||||
return null;
|
||||
}
|
||||
SortedSetDocValuesReaderState.DimTree dimTree = state.getDimTree(dim);
|
||||
return getDim(dim, path, pathOrd, dimTree.iterator(pathOrd), topN);
|
||||
} else {
|
||||
if (path.length > 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Field is not configured as hierarchical, path should be 0 length");
|
||||
}
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
if (ordRange == null) {
|
||||
// means dimension was never indexed
|
||||
return null;
|
||||
}
|
||||
return getDim(dim, null, -1, ordRange.iterator(), topN);
|
||||
}
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
if (ordRange == null) {
|
||||
return null; // means dimension was never indexed
|
||||
}
|
||||
return getDim(dim, ordRange, topN);
|
||||
}
|
||||
|
||||
private FacetResult getDim(String dim, OrdRange ordRange, int topN) throws IOException {
|
||||
private FacetResult getDim(
|
||||
String dim, String[] path, int pathOrd, PrimitiveIterator.OfInt childOrds, int topN)
|
||||
throws IOException {
|
||||
|
||||
TopOrdAndIntQueue q = null;
|
||||
|
||||
|
@ -118,7 +138,8 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
|||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
|
||||
for (int ord = ordRange.start; ord <= ordRange.end; ord++) {
|
||||
while (childOrds.hasNext()) {
|
||||
int ord = childOrds.next();
|
||||
if (counts.get(ord) > 0) {
|
||||
dimCount += counts.get(ord);
|
||||
childCount++;
|
||||
|
@ -148,12 +169,19 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
|||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
|
||||
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
|
||||
labelValues[i] = new LabelAndValue(parts[1], ordAndValue.value);
|
||||
labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
|
||||
if (pathOrd == -1) {
|
||||
// not hierarchical facet
|
||||
return new FacetResult(dim, emptyPath, dimCount, labelValues, childCount);
|
||||
} else {
|
||||
// hierarchical facet
|
||||
return new FacetResult(dim, path, counts.get(pathOrd), labelValues, childCount);
|
||||
}
|
||||
}
|
||||
|
||||
private class CountOneSegment implements Callable<Void> {
|
||||
|
@ -365,10 +393,19 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
|||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
|
||||
List<FacetResult> results = new ArrayList<>();
|
||||
for (Map.Entry<String, OrdRange> ent : state.getPrefixToOrdRange().entrySet()) {
|
||||
FacetResult fr = getDim(ent.getKey(), ent.getValue(), topN);
|
||||
if (fr != null) {
|
||||
results.add(fr);
|
||||
for (String dim : state.getDims()) {
|
||||
if (stateConfig.getDimConfig(dim).hierarchical) {
|
||||
SortedSetDocValuesReaderState.DimTree dimTree = state.getDimTree(dim);
|
||||
FacetResult fr = getDim(dim, emptyPath, dimTree.dimStartOrd, dimTree.iterator(), topN);
|
||||
if (fr != null) {
|
||||
results.add(fr);
|
||||
}
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
FacetResult fr = getDim(dim, emptyPath, -1, ordRange.iterator(), topN);
|
||||
if (fr != null) {
|
||||
results.add(fr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,12 +17,15 @@
|
|||
package org.apache.lucene.facet.sortedset;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Stack;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -51,20 +54,42 @@ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesRead
|
|||
|
||||
private final Map<String, OrdinalMap> cachedOrdMaps = new HashMap<>();
|
||||
|
||||
private final FacetsConfig config;
|
||||
|
||||
/** Used for hierarchical dims. */
|
||||
private final Map<String, DimTree> prefixToDimTree = new HashMap<>();
|
||||
|
||||
/** Used for flat dims. */
|
||||
private final Map<String, OrdRange> prefixToOrdRange = new HashMap<>();
|
||||
|
||||
/**
|
||||
* Creates this, pulling doc values from the default {@link
|
||||
* Creates this with a config, pulling doc values from the default {@link
|
||||
* FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
|
||||
*/
|
||||
public DefaultSortedSetDocValuesReaderState(IndexReader reader, FacetsConfig config)
|
||||
throws IOException {
|
||||
this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME, config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates this without a config, pulling doc values from the default {@link
|
||||
* FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
|
||||
*/
|
||||
public DefaultSortedSetDocValuesReaderState(IndexReader reader) throws IOException {
|
||||
this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
|
||||
this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME, null);
|
||||
}
|
||||
|
||||
/** Creates this without a config, pulling doc values from the specified field. */
|
||||
public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
|
||||
this(reader, field, null);
|
||||
}
|
||||
|
||||
/** Creates this, pulling doc values from the specified field. */
|
||||
public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
|
||||
public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field, FacetsConfig config)
|
||||
throws IOException {
|
||||
this.field = field;
|
||||
this.reader = reader;
|
||||
this.config = config;
|
||||
|
||||
// We need this to create thread-safe MultiSortedSetDV
|
||||
// per collector:
|
||||
|
@ -79,38 +104,139 @@ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesRead
|
|||
}
|
||||
valueCount = (int) dv.getValueCount();
|
||||
|
||||
// TODO: we can make this more efficient if eg we can be
|
||||
// "involved" when OrdinalMap is being created? Ie see
|
||||
// each term/ord it's assigning as it goes...
|
||||
String lastDim = null;
|
||||
int startOrd = -1;
|
||||
|
||||
// TODO: this approach can work for full hierarchy?;
|
||||
// TaxoReader can't do this since ords are not in
|
||||
// "sorted order" ... but we should generalize this to
|
||||
// support arbitrary hierarchy:
|
||||
for (int ord = 0; ord < valueCount; ord++) {
|
||||
final BytesRef term = dv.lookupOrd(ord);
|
||||
int ord = 0;
|
||||
while (ord != valueCount) {
|
||||
BytesRef term = dv.lookupOrd(ord);
|
||||
String[] components = FacetsConfig.stringToPath(term.utf8ToString());
|
||||
if (components.length != 2) {
|
||||
throw new IllegalArgumentException(
|
||||
"this class can only handle 2 level hierarchy (dim/value); got: "
|
||||
+ Arrays.toString(components)
|
||||
+ " "
|
||||
+ term.utf8ToString());
|
||||
String dim = components[0];
|
||||
if (config != null && config.getDimConfig(dim).hierarchical) {
|
||||
ord = createOneHierarchicalFacetDimState(dv, ord) + 1;
|
||||
} else {
|
||||
ord = createOneFlatFacetDimState(dv, ord) + 1;
|
||||
}
|
||||
if (!components[0].equals(lastDim)) {
|
||||
if (lastDim != null) {
|
||||
prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord - 1));
|
||||
}
|
||||
}
|
||||
|
||||
// returns last ord of dimension
|
||||
private int createOneHierarchicalFacetDimState(SortedSetDocValues dv, int dimStartOrd)
|
||||
throws IOException {
|
||||
List<Boolean> hasChildren = new ArrayList<>();
|
||||
List<Integer> siblings = new ArrayList<>();
|
||||
|
||||
// stack of paths with unfulfilled siblings
|
||||
Stack<OrdAndComponent> siblingStack = new Stack<>();
|
||||
|
||||
int dimEndOrd = dimStartOrd;
|
||||
|
||||
BytesRef nextTerm = dv.lookupOrd(dimEndOrd);
|
||||
String[] nextComponents = FacetsConfig.stringToPath(nextTerm.utf8ToString());
|
||||
String dim = nextComponents[0];
|
||||
|
||||
while (true) {
|
||||
String[] components = nextComponents;
|
||||
|
||||
int ord = dimEndOrd - dimStartOrd;
|
||||
|
||||
while (siblingStack.empty() == false
|
||||
&& siblingStack.peek().component.length >= components.length) {
|
||||
OrdAndComponent possibleSibling = siblingStack.pop();
|
||||
if (possibleSibling.component.length == components.length) {
|
||||
// lengths are equal, all non-siblings of equal length will have already been popped off
|
||||
// so this must be sibling
|
||||
siblings.set(possibleSibling.ord, ord);
|
||||
}
|
||||
startOrd = ord;
|
||||
lastDim = components[0];
|
||||
}
|
||||
|
||||
if (dimEndOrd + 1 == valueCount) {
|
||||
// current ord needs to be added, can't have children or siblings
|
||||
siblings.add(-1);
|
||||
hasChildren.add(false);
|
||||
break;
|
||||
}
|
||||
|
||||
nextTerm = dv.lookupOrd(dimEndOrd + 1);
|
||||
nextComponents = FacetsConfig.stringToPath(nextTerm.utf8ToString());
|
||||
|
||||
if (nextComponents[0].equals(components[0]) == false) {
|
||||
// current ord needs to be added, can't have children or siblings
|
||||
siblings.add(-1);
|
||||
hasChildren.add(false);
|
||||
break;
|
||||
}
|
||||
|
||||
if (components.length < nextComponents.length) {
|
||||
// next ord must be a direct child of current ord, this is because we are indexing all
|
||||
// ancestral paths
|
||||
hasChildren.add(ord, true);
|
||||
// we don't know if this ord has a sibling or where it's sibling could be yet
|
||||
siblingStack.push(new OrdAndComponent(ord, components));
|
||||
// we still add INVALID_ORDINAL, which will be replaced if a valid sibling is found
|
||||
siblings.add(ord, INVALID_ORDINAL);
|
||||
} else if (components.length == nextComponents.length) {
|
||||
// next ord must be a sibling of current and there are no direct children of current, this
|
||||
// is because we
|
||||
// are indexing all ancestral paths
|
||||
siblings.add(ord, ord + 1);
|
||||
hasChildren.add(ord, false);
|
||||
} else {
|
||||
// components.length > nextComponents.length
|
||||
// next ord is neither sibling nor child
|
||||
siblings.add(ord, INVALID_ORDINAL);
|
||||
hasChildren.add(ord, false);
|
||||
}
|
||||
|
||||
dimEndOrd++;
|
||||
}
|
||||
|
||||
if (lastDim != null) {
|
||||
prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount - 1));
|
||||
prefixToDimTree.put(dim, new DimTree(dimStartOrd, siblings, hasChildren));
|
||||
|
||||
return dimEndOrd;
|
||||
}
|
||||
|
||||
// returns last ord of dimension
|
||||
private int createOneFlatFacetDimState(SortedSetDocValues dv, int dimStartOrd)
|
||||
throws IOException {
|
||||
|
||||
int dimEndOrd = dimStartOrd;
|
||||
|
||||
BytesRef nextTerm = dv.lookupOrd(dimEndOrd);
|
||||
String[] nextComponents = FacetsConfig.stringToPath(nextTerm.utf8ToString());
|
||||
if (nextComponents.length != 2) {
|
||||
throw new IllegalArgumentException(
|
||||
"dimension not configured to handle hierarchical field; got: "
|
||||
+ Arrays.toString(nextComponents)
|
||||
+ " "
|
||||
+ nextTerm.utf8ToString());
|
||||
}
|
||||
String dim = nextComponents[0];
|
||||
|
||||
while (true) {
|
||||
String[] components = nextComponents;
|
||||
|
||||
if (dimEndOrd + 1 == valueCount) {
|
||||
break;
|
||||
}
|
||||
|
||||
nextTerm = dv.lookupOrd(dimEndOrd + 1);
|
||||
nextComponents = FacetsConfig.stringToPath(nextTerm.utf8ToString());
|
||||
|
||||
if (nextComponents[0].equals(components[0]) == false) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (nextComponents.length != 2) {
|
||||
throw new IllegalArgumentException(
|
||||
"dimension not configured to handle hierarchical field; got: "
|
||||
+ Arrays.toString(nextComponents)
|
||||
+ " "
|
||||
+ nextTerm.utf8ToString());
|
||||
}
|
||||
|
||||
dimEndOrd++;
|
||||
}
|
||||
prefixToOrdRange.put(dim, new OrdRange(dimStartOrd, dimEndOrd));
|
||||
|
||||
return dimEndOrd;
|
||||
}
|
||||
|
||||
/** Return the memory usage of this object in bytes. Negative values are illegal. */
|
||||
|
@ -194,18 +320,6 @@ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesRead
|
|||
return new MultiSortedSetDocValues(values, starts, map, cost);
|
||||
}
|
||||
|
||||
/** Returns mapping from prefix to {@link OrdRange}. */
|
||||
@Override
|
||||
public Map<String, OrdRange> getPrefixToOrdRange() {
|
||||
return prefixToOrdRange;
|
||||
}
|
||||
|
||||
/** Returns the {@link OrdRange} for this dimension. */
|
||||
@Override
|
||||
public OrdRange getOrdRange(String dim) {
|
||||
return prefixToOrdRange.get(dim);
|
||||
}
|
||||
|
||||
/** Indexed field we are reading. */
|
||||
@Override
|
||||
public String getField() {
|
||||
|
@ -222,4 +336,72 @@ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesRead
|
|||
public int getSize() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetsConfig getFacetsConfig() {
|
||||
return config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<String> getDims() {
|
||||
return () ->
|
||||
new Iterator<>() {
|
||||
|
||||
final Iterator<String> dimTreeIterator = prefixToDimTree.keySet().iterator();
|
||||
final Iterator<String> ordRangeIterator = prefixToOrdRange.keySet().iterator();
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ordRangeIterator.hasNext() || dimTreeIterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String next() {
|
||||
if (dimTreeIterator.hasNext()) {
|
||||
return dimTreeIterator.next();
|
||||
} else if (ordRangeIterator.hasNext()) {
|
||||
return ordRangeIterator.next();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/* Flat facet operations */
|
||||
|
||||
@Override
|
||||
public Map<String, OrdRange> getPrefixToOrdRange() {
|
||||
return prefixToOrdRange;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OrdRange getOrdRange(String dim) {
|
||||
if (config != null && config.getDimConfig(dim).hierarchical) {
|
||||
throw new UnsupportedOperationException(
|
||||
"This operation is only supported for flat dimensions");
|
||||
}
|
||||
return prefixToOrdRange.get(dim);
|
||||
}
|
||||
|
||||
/* Hierarchical facet operations */
|
||||
|
||||
@Override
|
||||
public DimTree getDimTree(String dim) {
|
||||
if (config == null || config.getDimConfig(dim).hierarchical == false) {
|
||||
throw new UnsupportedOperationException(
|
||||
"This opperation is only supported for hierarchical facets");
|
||||
}
|
||||
return prefixToDimTree.get(dim);
|
||||
}
|
||||
|
||||
private static final class OrdAndComponent {
|
||||
int ord;
|
||||
String[] component;
|
||||
|
||||
public OrdAndComponent(int ord, String[] component) {
|
||||
this.ord = ord;
|
||||
this.component = component;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,8 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.PrimitiveIterator;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetUtils;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
|
@ -31,6 +32,7 @@ import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
|||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.TopOrdAndIntQueue;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.DimTree;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -66,10 +68,13 @@ import org.apache.lucene.util.LongValues;
|
|||
public class SortedSetDocValuesFacetCounts extends Facets {
|
||||
|
||||
final SortedSetDocValuesReaderState state;
|
||||
final FacetsConfig stateConfig;
|
||||
final SortedSetDocValues dv;
|
||||
final String field;
|
||||
final int[] counts;
|
||||
|
||||
private static final String[] emptyPath = new String[0];
|
||||
|
||||
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
|
||||
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) throws IOException {
|
||||
this(state, null);
|
||||
|
@ -80,8 +85,9 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
throws IOException {
|
||||
this.state = state;
|
||||
this.field = state.getField();
|
||||
dv = state.getDocValues();
|
||||
counts = new int[state.getSize()];
|
||||
this.stateConfig = Objects.requireNonNullElse(state.getFacetsConfig(), new FacetsConfig());
|
||||
this.dv = state.getDocValues();
|
||||
this.counts = new int[state.getSize()];
|
||||
if (hits == null) {
|
||||
// browse only
|
||||
countAll();
|
||||
|
@ -95,17 +101,32 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
if (path.length > 0) {
|
||||
throw new IllegalArgumentException("path should be 0 length");
|
||||
|
||||
if (stateConfig.getDimConfig(dim).hierarchical) {
|
||||
int pathOrd = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
|
||||
if (pathOrd < 0) {
|
||||
// path was never indexed
|
||||
return null;
|
||||
}
|
||||
DimTree dimTree = state.getDimTree(dim);
|
||||
return getDim(dim, path, pathOrd, dimTree.iterator(pathOrd), topN);
|
||||
} else {
|
||||
if (path.length > 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Field is not configured as hierarchical, path should be 0 length");
|
||||
}
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
if (ordRange == null) {
|
||||
// means dimension was never indexed
|
||||
return null;
|
||||
}
|
||||
return getDim(dim, null, -1, ordRange.iterator(), topN);
|
||||
}
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
if (ordRange == null) {
|
||||
return null; // means dimension was never indexed
|
||||
}
|
||||
return getDim(dim, ordRange, topN);
|
||||
}
|
||||
|
||||
private FacetResult getDim(String dim, OrdRange ordRange, int topN) throws IOException {
|
||||
private FacetResult getDim(
|
||||
String dim, String[] path, int pathOrd, PrimitiveIterator.OfInt childOrds, int topN)
|
||||
throws IOException {
|
||||
|
||||
TopOrdAndIntQueue q = null;
|
||||
|
||||
|
@ -115,7 +136,8 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
int childCount = 0;
|
||||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
for (int ord = ordRange.start; ord <= ordRange.end; ord++) {
|
||||
while (childOrds.hasNext()) {
|
||||
int ord = childOrds.next();
|
||||
if (counts[ord] > 0) {
|
||||
dimCount += counts[ord];
|
||||
childCount++;
|
||||
|
@ -145,12 +167,19 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
|
||||
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
|
||||
labelValues[i] = new LabelAndValue(parts[1], ordAndValue.value);
|
||||
labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
|
||||
if (pathOrd == -1) {
|
||||
// not hierarchical facet
|
||||
return new FacetResult(dim, emptyPath, dimCount, labelValues, childCount);
|
||||
} else {
|
||||
// hierarchical facet
|
||||
return new FacetResult(dim, path, counts[pathOrd], labelValues, childCount);
|
||||
}
|
||||
}
|
||||
|
||||
private void countOneSegment(
|
||||
|
@ -317,10 +346,19 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
|
||||
List<FacetResult> results = new ArrayList<>();
|
||||
for (Map.Entry<String, OrdRange> ent : state.getPrefixToOrdRange().entrySet()) {
|
||||
FacetResult fr = getDim(ent.getKey(), ent.getValue(), topN);
|
||||
if (fr != null) {
|
||||
results.add(fr);
|
||||
for (String dim : state.getDims()) {
|
||||
if (stateConfig.getDimConfig(dim).hierarchical) {
|
||||
DimTree dimTree = state.getDimTree(dim);
|
||||
FacetResult fr = getDim(dim, emptyPath, dimTree.dimStartOrd, dimTree.iterator(), topN);
|
||||
if (fr != null) {
|
||||
results.add(fr);
|
||||
}
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
FacetResult fr = getDim(dim, emptyPath, -1, ordRange.iterator(), topN);
|
||||
if (fr != null) {
|
||||
results.add(fr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.facet.sortedset;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.facet.FacetField;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
|
||||
/**
|
||||
|
@ -40,20 +41,39 @@ public class SortedSetDocValuesFacetField extends Field {
|
|||
/** Dimension. */
|
||||
public final String dim;
|
||||
|
||||
/** Label. */
|
||||
public final String label;
|
||||
/** Path. */
|
||||
public final String[] path;
|
||||
|
||||
/**
|
||||
* String form of path.
|
||||
*
|
||||
* @deprecated This field will be removed in a future version. {@link
|
||||
* FacetsConfig#pathToString(String[])} can be applied to {@code path} as a replacement if
|
||||
* string path is desired.
|
||||
*/
|
||||
@Deprecated public final String label;
|
||||
|
||||
/** Sole constructor. */
|
||||
public SortedSetDocValuesFacetField(String dim, String label) {
|
||||
public SortedSetDocValuesFacetField(String dim, String... path) {
|
||||
super("dummy", TYPE);
|
||||
FacetField.verifyLabel(label);
|
||||
for (String label : path) {
|
||||
FacetField.verifyLabel(label);
|
||||
}
|
||||
FacetField.verifyLabel(dim);
|
||||
if (path.length == 0) {
|
||||
throw new IllegalArgumentException("path must have at least one element");
|
||||
}
|
||||
this.dim = dim;
|
||||
this.label = label;
|
||||
this.path = path;
|
||||
this.label = FacetsConfig.pathToString(path);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SortedSetDocValuesFacetField(dim=" + dim + " label=" + label + ")";
|
||||
return "SortedSetDocValuesFacetField(dim="
|
||||
+ dim
|
||||
+ " path="
|
||||
+ FacetsConfig.pathToString(path)
|
||||
+ ")";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,10 +17,14 @@
|
|||
package org.apache.lucene.facet.sortedset;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PrimitiveIterator;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* Wraps a {@link IndexReader} and resolves ords using existing {@link SortedSetDocValues} APIs
|
||||
|
@ -36,10 +40,7 @@ import org.apache.lucene.util.Accountable;
|
|||
*/
|
||||
public abstract class SortedSetDocValuesReaderState implements Accountable {
|
||||
|
||||
/**
|
||||
* Holds start/end range of ords, which maps to one dimension (someday we may generalize it to map
|
||||
* to hierarchies within one dimension).
|
||||
*/
|
||||
/** Holds start/end range of ords, which maps to one dimension. Only used for flat hierarchies. */
|
||||
public static final class OrdRange {
|
||||
/** Start of range, inclusive: */
|
||||
public final int start;
|
||||
|
@ -51,8 +52,111 @@ public abstract class SortedSetDocValuesReaderState implements Accountable {
|
|||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
/** Iterates from start to end ord (inclusive) */
|
||||
public PrimitiveIterator.OfInt iterator() {
|
||||
return new PrimitiveIterator.OfInt() {
|
||||
int current = start;
|
||||
|
||||
@Override
|
||||
public int nextInt() {
|
||||
if (current > end) {
|
||||
return INVALID_ORDINAL;
|
||||
}
|
||||
return current++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return current <= end;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds children and sibling information for a single dimension. Only used with hierarchical
|
||||
* dimensions.
|
||||
*/
|
||||
public static final class DimTree {
|
||||
private final FixedBitSet hasChildren;
|
||||
// TODO: This array can take up a lot of space. Change type based on input size maybe?
|
||||
private final int[] siblings;
|
||||
|
||||
/** The first ord of the dimension */
|
||||
public final int dimStartOrd;
|
||||
|
||||
/** Sibling and children must be of same length */
|
||||
public DimTree(int dimStartOrd, List<Integer> sibling, List<Boolean> hasChildren) {
|
||||
if (sibling.size() != hasChildren.size()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Sibling list and children list must have the same size. Got sibling list size of "
|
||||
+ sibling.size()
|
||||
+ " and child list size of "
|
||||
+ hasChildren.size());
|
||||
}
|
||||
this.hasChildren = new FixedBitSet(hasChildren.size());
|
||||
this.siblings = new int[sibling.size()];
|
||||
for (int i = 0; i < sibling.size(); i++) {
|
||||
if (hasChildren.get(i)) {
|
||||
assert i < sibling.size() - 1;
|
||||
this.hasChildren.set(i);
|
||||
}
|
||||
assert this.siblings[i] < sibling.size();
|
||||
this.siblings[i] = sibling.get(i);
|
||||
}
|
||||
this.dimStartOrd = dimStartOrd;
|
||||
}
|
||||
|
||||
/** Iterates through all first level children of dimension */
|
||||
public PrimitiveIterator.OfInt iterator() {
|
||||
return iterator(dimStartOrd);
|
||||
}
|
||||
|
||||
/** Iterates through all children of given pathOrd */
|
||||
public PrimitiveIterator.OfInt iterator(int pathOrd) {
|
||||
return new PrimitiveIterator.OfInt() {
|
||||
|
||||
boolean atStart = true;
|
||||
int currentOrd = pathOrd - dimStartOrd;
|
||||
|
||||
@Override
|
||||
public int nextInt() {
|
||||
if (atStart) {
|
||||
if (currentOrd < 0 || currentOrd >= hasChildren.length()) {
|
||||
return INVALID_ORDINAL;
|
||||
}
|
||||
atStart = false;
|
||||
if (hasChildren.get(currentOrd)) {
|
||||
currentOrd++;
|
||||
return currentOrd + dimStartOrd;
|
||||
} else {
|
||||
return INVALID_ORDINAL;
|
||||
}
|
||||
} else {
|
||||
currentOrd = siblings[currentOrd];
|
||||
return currentOrd + dimStartOrd;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (atStart) {
|
||||
if (currentOrd < 0 || currentOrd >= hasChildren.length()) {
|
||||
return false;
|
||||
}
|
||||
return hasChildren.get(currentOrd);
|
||||
} else {
|
||||
return siblings[currentOrd] != INVALID_ORDINAL;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/** Invalid ordinal const */
|
||||
public static final int INVALID_ORDINAL = -1;
|
||||
|
||||
/** Sole constructor. */
|
||||
protected SortedSetDocValuesReaderState() {}
|
||||
|
||||
|
@ -62,15 +166,28 @@ public abstract class SortedSetDocValuesReaderState implements Accountable {
|
|||
/** Indexed field we are reading. */
|
||||
public abstract String getField();
|
||||
|
||||
/** Returns top-level index reader. */
|
||||
public abstract IndexReader getReader();
|
||||
|
||||
/** Number of unique labels. */
|
||||
public abstract int getSize();
|
||||
|
||||
/** Returns the associated facet config. */
|
||||
public abstract FacetsConfig getFacetsConfig();
|
||||
|
||||
/* Only used for flat facets (dim/value) */
|
||||
|
||||
/** Returns the {@link OrdRange} for this dimension. */
|
||||
public abstract OrdRange getOrdRange(String dim);
|
||||
|
||||
/** Returns mapping from prefix to {@link OrdRange}. */
|
||||
public abstract Map<String, OrdRange> getPrefixToOrdRange();
|
||||
|
||||
/** Returns top-level index reader. */
|
||||
public abstract IndexReader getReader();
|
||||
/* Only used for hierarchical facets */
|
||||
|
||||
/** Number of unique labels. */
|
||||
public abstract int getSize();
|
||||
/** Returns mapping from prefix to {@link DimTree} */
|
||||
public abstract DimTree getDimTree(String dim);
|
||||
|
||||
/** Returns a list of all dimensions */
|
||||
public abstract Iterable<String> getDims();
|
||||
}
|
||||
|
|
|
@ -238,7 +238,7 @@ public abstract class FacetTestCase extends LuceneTestCase {
|
|||
} else if (b.value.doubleValue() > a.value.doubleValue()) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
return a.dim.compareTo(b.dim);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
|
|
@ -18,14 +18,19 @@ package org.apache.lucene.facet.sortedset;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Stack;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
|
@ -47,6 +52,7 @@ import org.apache.lucene.tests.index.RandomIndexWriter;
|
|||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NamedThreadFactory;
|
||||
|
||||
public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
||||
|
@ -85,6 +91,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
try {
|
||||
Facets facets = getAllFacets(searcher, state, exec);
|
||||
|
||||
// value should ideally be 2 but SSDV facets are bugged here
|
||||
assertEquals(
|
||||
"dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n",
|
||||
facets.getTopChildren(10, "a").toString());
|
||||
|
@ -105,6 +112,84 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testBasicHierarchical() throws Exception {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setMultiValued("a", true);
|
||||
config.setMultiValued("c", true);
|
||||
config.setHierarchical("c", true);
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "bar"));
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "zoo"));
|
||||
doc.add(new SortedSetDocValuesFacetField("b", "baz"));
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "buzz"));
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bee"));
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bif"));
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bif", "baf"));
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "buzz", "biz"));
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "buzz", "biz", "bar"));
|
||||
writer.addDocument(config.build(doc));
|
||||
if (random().nextBoolean()) {
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bif", "baf"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
// NRT open
|
||||
try (IndexReader r = writer.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state =
|
||||
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
|
||||
|
||||
ExecutorService exec = randomExecutorServiceOrNull();
|
||||
try {
|
||||
Facets facets = getAllFacets(searcher, state, exec);
|
||||
|
||||
// since a is not set to be hierarchical, it's value count will be bugged as ancestral
|
||||
// paths are not indexed
|
||||
assertEquals(
|
||||
"dim=a path=[] value=4 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n",
|
||||
facets.getTopChildren(10, "a").toString());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=1 childCount=1\n baz (1)\n",
|
||||
facets.getTopChildren(10, "b").toString());
|
||||
assertEquals(
|
||||
"dim=c path=[buzz] value=2 childCount=3\n bif (2)\n bee (1)\n biz (1)\n",
|
||||
facets.getTopChildren(10, "c", "buzz").toString());
|
||||
assertEquals(
|
||||
"dim=c path=[buzz, bif] value=2 childCount=1\n baf (2)\n",
|
||||
facets.getTopChildren(10, "c", "buzz", "bif").toString());
|
||||
|
||||
// DrillDown:
|
||||
DrillDownQuery q = new DrillDownQuery(config);
|
||||
q.add("a", "foo");
|
||||
q.add("b", "baz");
|
||||
TopDocs hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("c", "buzz", "bif");
|
||||
hits = searcher.search(q, 2);
|
||||
assertEquals(2, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("c", "buzz", "biz", "bar");
|
||||
hits = searcher.search(q, 2);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
} finally {
|
||||
if (exec != null) exec.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// See: LUCENE-10070
|
||||
public void testCountAll() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
|
@ -158,6 +243,67 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testHierarchicalCountAll() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setHierarchical("b", true);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "0", Field.Store.NO));
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
|
||||
doc.add(new SortedSetDocValuesFacetField("b", "buzz", "bee"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "bar"));
|
||||
doc.add(new SortedSetDocValuesFacetField("b", "buzz", "baz"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
writer.deleteDocuments(new Term("id", "0"));
|
||||
|
||||
// NRT open
|
||||
try (IndexReader r = writer.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state =
|
||||
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
|
||||
|
||||
Facets facets = new SortedSetDocValuesFacetCounts(state);
|
||||
|
||||
assertEquals(
|
||||
"dim=a path=[] value=1 childCount=1\n bar (1)\n",
|
||||
facets.getTopChildren(10, "a").toString());
|
||||
assertEquals(
|
||||
"dim=b path=[buzz] value=1 childCount=1\n baz (1)\n",
|
||||
facets.getTopChildren(10, "b", "buzz").toString());
|
||||
|
||||
ExecutorService exec =
|
||||
new ThreadPoolExecutor(
|
||||
1,
|
||||
TestUtil.nextInt(random(), 2, 6),
|
||||
Long.MAX_VALUE,
|
||||
TimeUnit.MILLISECONDS,
|
||||
new LinkedBlockingQueue<Runnable>(),
|
||||
new NamedThreadFactory("TestIndexSearcher"));
|
||||
try {
|
||||
facets = new ConcurrentSortedSetDocValuesFacetCounts(state, exec);
|
||||
|
||||
assertEquals(
|
||||
"dim=a path=[] value=1 childCount=1\n bar (1)\n",
|
||||
facets.getTopChildren(10, "a").toString());
|
||||
assertEquals(
|
||||
"dim=b path=[buzz] value=1 childCount=1\n baz (1)\n",
|
||||
facets.getTopChildren(10, "b", "buzz").toString());
|
||||
} finally {
|
||||
exec.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasicSingleValued() throws Exception {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setMultiValued("a", false);
|
||||
|
@ -210,6 +356,57 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testHierarchicalBasicSingleValues() throws Exception {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setHierarchical("c", true);
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "buzz", "bar"));
|
||||
writer.addDocument(config.build(doc));
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "buzz", "baz"));
|
||||
writer.addDocument(config.build(doc));
|
||||
if (random().nextBoolean()) {
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "baz"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
// NRT open
|
||||
try (IndexReader r = writer.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state =
|
||||
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
|
||||
|
||||
ExecutorService exec = randomExecutorServiceOrNull();
|
||||
try {
|
||||
Facets facets = getAllFacets(searcher, state, exec);
|
||||
|
||||
assertEquals(
|
||||
"dim=c path=[buzz] value=2 childCount=2\n bar (1)\n baz (1)\n",
|
||||
facets.getTopChildren(10, "c", "buzz").toString());
|
||||
|
||||
DrillDownQuery q = new DrillDownQuery(config);
|
||||
q.add("c", "buzz");
|
||||
TopDocs hits = searcher.search(q, 1);
|
||||
assertEquals(2, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("c", "buzz", "bar");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
} finally {
|
||||
if (exec != null) exec.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testDrillDownOptions() throws Exception {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setDrillDownTermsIndexing("c", FacetsConfig.DrillDownTermsIndexing.NONE);
|
||||
|
@ -293,6 +490,144 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testHierarchicalDrillDownOptions() throws Exception {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setDrillDownTermsIndexing("c", FacetsConfig.DrillDownTermsIndexing.NONE);
|
||||
config.setDrillDownTermsIndexing(
|
||||
"d", FacetsConfig.DrillDownTermsIndexing.DIMENSION_AND_FULL_PATH);
|
||||
config.setDrillDownTermsIndexing("e", FacetsConfig.DrillDownTermsIndexing.ALL_PATHS_NO_DIM);
|
||||
config.setDrillDownTermsIndexing("f", FacetsConfig.DrillDownTermsIndexing.FULL_PATH_ONLY);
|
||||
config.setDrillDownTermsIndexing("g", FacetsConfig.DrillDownTermsIndexing.ALL);
|
||||
config.setHierarchical("c", true);
|
||||
config.setHierarchical("d", true);
|
||||
config.setHierarchical("e", true);
|
||||
config.setHierarchical("f", true);
|
||||
config.setHierarchical("g", true);
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "biz", "baz"));
|
||||
doc.add(new SortedSetDocValuesFacetField("d", "biz", "baz"));
|
||||
doc.add(new SortedSetDocValuesFacetField("e", "biz", "baz"));
|
||||
doc.add(new SortedSetDocValuesFacetField("f", "biz", "baz"));
|
||||
doc.add(new SortedSetDocValuesFacetField("g", "biz", "baz"));
|
||||
writer.addDocument(config.build(doc));
|
||||
if (random().nextBoolean()) {
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
// NRT open
|
||||
try (IndexReader r = writer.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
// Drill down with different indexing configuration options
|
||||
DrillDownQuery q = new DrillDownQuery(config);
|
||||
q.add("c");
|
||||
TopDocs hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("c", "biz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("c", "biz", "baz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("c", "foo");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("d");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("d", "foo");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("d", "biz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("d", "biz", "baz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("e");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("e", "foo");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("e", "biz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("e", "biz", "baz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("f");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("f", "foo");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("f", "biz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("f", "biz", "baz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("g");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("g", "foo");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(0, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("g", "biz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
|
||||
q = new DrillDownQuery(config);
|
||||
q.add("g", "biz", "baz");
|
||||
hits = searcher.search(q, 1);
|
||||
assertEquals(1, hits.totalHits.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LUCENE-5090
|
||||
@SuppressWarnings("unused")
|
||||
public void testStaleState() throws Exception {
|
||||
|
@ -405,6 +740,72 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testHierarchicalSparseFacets() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setHierarchical("d", true);
|
||||
config.setHierarchical("e", true);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("d", "foo", "bar"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("d", "foo", "baz"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("e", "biz", "baz"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
// NRT open
|
||||
try (IndexReader r = writer.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state =
|
||||
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
|
||||
|
||||
ExecutorService exec = randomExecutorServiceOrNull();
|
||||
try {
|
||||
Facets facets = getAllFacets(searcher, state, exec);
|
||||
|
||||
// Ask for top 10 labels for any dims that have counts:
|
||||
List<FacetResult> results = facets.getAllDims(10);
|
||||
|
||||
assertEquals(2, results.size());
|
||||
assertEquals(
|
||||
"dim=d path=[] value=2 childCount=1\n foo (2)\n", results.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=e path=[] value=1 childCount=1\n biz (1)\n", results.get(1).toString());
|
||||
|
||||
Collection<Accountable> resources = state.getChildResources();
|
||||
assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
|
||||
if (searcher.getIndexReader().leaves().size() > 1) {
|
||||
assertTrue(state.ramBytesUsed() > 0);
|
||||
assertFalse(resources.isEmpty());
|
||||
assertTrue(resources.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
|
||||
} else {
|
||||
assertEquals(0, state.ramBytesUsed());
|
||||
assertTrue(resources.isEmpty());
|
||||
}
|
||||
} finally {
|
||||
if (exec != null) exec.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSomeSegmentsMissing() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
|
@ -448,6 +849,58 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testHierarchicalSomeSegmentsMissing() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setHierarchical("b", true);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "foo1"));
|
||||
doc.add(new SortedSetDocValuesFacetField("b", "foo", "bar"));
|
||||
writer.addDocument(config.build(doc));
|
||||
writer.commit();
|
||||
|
||||
doc = new Document();
|
||||
writer.addDocument(config.build(doc));
|
||||
writer.commit();
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "foo2"));
|
||||
doc.add(new SortedSetDocValuesFacetField("b", "foo", "buzz"));
|
||||
writer.addDocument(config.build(doc));
|
||||
writer.commit();
|
||||
|
||||
// NRT open
|
||||
try (IndexReader r = writer.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state =
|
||||
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
|
||||
|
||||
ExecutorService exec = randomExecutorServiceOrNull();
|
||||
try {
|
||||
Facets facets = getAllFacets(searcher, state, exec);
|
||||
|
||||
// Ask for top 10 labels for any dims that have counts:
|
||||
assertEquals(
|
||||
"dim=a path=[] value=2 childCount=2\n foo1 (1)\n foo2 (1)\n",
|
||||
facets.getTopChildren(10, "a").toString());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=2 childCount=1\n foo (2)\n",
|
||||
facets.getTopChildren(10, "b").toString());
|
||||
assertEquals(
|
||||
"dim=b path=[foo] value=2 childCount=2\n bar (1)\n buzz (1)\n",
|
||||
facets.getTopChildren(10, "b", "foo").toString());
|
||||
} finally {
|
||||
if (exec != null) exec.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
int fullIterations = LuceneTestCase.TEST_NIGHTLY ? 20 : 3;
|
||||
for (int fullIter = 0; fullIter < fullIterations; fullIter++) {
|
||||
|
@ -562,6 +1015,274 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testRandomHierarchicalFlatMix() throws Exception {
|
||||
int fullIterations = LuceneTestCase.TEST_NIGHTLY ? 20 : 3;
|
||||
for (int fullIter = 0; fullIter < fullIterations; fullIter++) {
|
||||
String[] tokens = getRandomTokens(10);
|
||||
|
||||
try (Directory indexDir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), indexDir)) {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
int numDocs = atLeast(1000);
|
||||
// Most of the time allow up to 7 dims per doc, but occasionally limit all docs to a single
|
||||
// dim:
|
||||
int numDims;
|
||||
if (random().nextInt(10) < 8) {
|
||||
numDims = TestUtil.nextInt(random(), 1, 7);
|
||||
} else {
|
||||
numDims = 1;
|
||||
}
|
||||
boolean[] hierarchicalDims = new boolean[numDims];
|
||||
for (int i = 0; i < numDims; i++) {
|
||||
boolean isHierarchicalDim = random().nextBoolean();
|
||||
config.setHierarchical("dim" + i, isHierarchicalDim);
|
||||
hierarchicalDims[i] = isHierarchicalDim;
|
||||
}
|
||||
List<TestDoc> testDocs = getRandomDocs(tokens, numDocs, numDims);
|
||||
List<Set<SortedSetDocValuesFacetField>> testDocFacets = new ArrayList<>();
|
||||
for (TestDoc testDoc : testDocs) {
|
||||
Document doc = new Document();
|
||||
Set<SortedSetDocValuesFacetField> docFacets = new HashSet<>();
|
||||
doc.add(newStringField("content", testDoc.content, Field.Store.NO));
|
||||
for (int i = 0; i < numDims; i++) {
|
||||
if (hierarchicalDims[i]) {
|
||||
int pathLength;
|
||||
if (numDims == 1) {
|
||||
pathLength = 1;
|
||||
} else {
|
||||
pathLength = random().nextInt(numDims - 1) + 1;
|
||||
}
|
||||
List<String> path = new ArrayList<>();
|
||||
for (int j = 0; j < pathLength; j++) {
|
||||
if (testDoc.dims[j] != null) {
|
||||
path.add(testDoc.dims[j]);
|
||||
}
|
||||
}
|
||||
doc.add(new SortedSetDocValuesFacetField("dim" + i, path.toArray(String[]::new)));
|
||||
for (int j = 0; j < path.size(); j++) {
|
||||
docFacets.add(
|
||||
new SortedSetDocValuesFacetField(
|
||||
"dim" + i, path.subList(0, j + 1).toArray(String[]::new)));
|
||||
}
|
||||
} else if (testDoc.dims[i] != null) {
|
||||
doc.add(new SortedSetDocValuesFacetField("dim" + i, testDoc.dims[i]));
|
||||
docFacets.add(new SortedSetDocValuesFacetField("dim" + i, testDoc.dims[i]));
|
||||
}
|
||||
}
|
||||
testDocFacets.add(docFacets);
|
||||
w.addDocument(config.build(doc));
|
||||
}
|
||||
|
||||
// NRT open
|
||||
try (IndexReader r = w.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state =
|
||||
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
|
||||
ExecutorService exec = randomExecutorServiceOrNull();
|
||||
try {
|
||||
int iters = atLeast(100);
|
||||
for (int iter = 0; iter < iters; iter++) {
|
||||
String searchToken = tokens[random().nextInt(tokens.length)];
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter content=" + searchToken);
|
||||
}
|
||||
FacetsCollector fc = new FacetsCollector();
|
||||
FacetsCollector.search(
|
||||
searcher, new TermQuery(new Term("content", searchToken)), 10, fc);
|
||||
Facets facets;
|
||||
if (exec != null) {
|
||||
facets = new ConcurrentSortedSetDocValuesFacetCounts(state, fc, exec);
|
||||
} else {
|
||||
facets = new SortedSetDocValuesFacetCounts(state, fc);
|
||||
}
|
||||
// Slow, yet hopefully bug-free, faceting:
|
||||
Map<String, FacetResult> expectedResults = new HashMap<>();
|
||||
|
||||
for (int i = 0; i < testDocs.size(); i++) {
|
||||
TestDoc doc = testDocs.get(i);
|
||||
if (doc.content.equals(searchToken)) {
|
||||
// goes through all facets paths in the doc
|
||||
for (SortedSetDocValuesFacetField facetField : testDocFacets.get(i)) {
|
||||
String[] path = facetField.path;
|
||||
String parentDimPathString;
|
||||
if (path.length == 1) {
|
||||
parentDimPathString = facetField.dim;
|
||||
} else {
|
||||
parentDimPathString =
|
||||
facetField.dim
|
||||
+ FacetsConfig.DELIM_CHAR
|
||||
+ FacetsConfig.pathToString(path, path.length - 1);
|
||||
}
|
||||
FacetResult result = expectedResults.get(parentDimPathString);
|
||||
if (result == null) {
|
||||
String[] resultPath = new String[path.length - 1];
|
||||
System.arraycopy(path, 0, resultPath, 0, resultPath.length);
|
||||
result =
|
||||
new FacetResult(facetField.dim, resultPath, 0, new LabelAndValue[0], 0);
|
||||
}
|
||||
String child = path[path.length - 1];
|
||||
LabelAndValue[] labelAndValues = result.labelValues;
|
||||
boolean containsChild = false;
|
||||
for (int k = 0; k < labelAndValues.length; k++) {
|
||||
if (labelAndValues[k].label.equals(child)) {
|
||||
containsChild = true;
|
||||
labelAndValues[k] =
|
||||
new LabelAndValue(
|
||||
labelAndValues[k].label, labelAndValues[k].value.intValue() + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
LabelAndValue[] newLabelAndValues;
|
||||
int childCount = result.childCount;
|
||||
if (containsChild == false) {
|
||||
newLabelAndValues = new LabelAndValue[labelAndValues.length + 1];
|
||||
System.arraycopy(
|
||||
labelAndValues, 0, newLabelAndValues, 0, labelAndValues.length);
|
||||
newLabelAndValues[newLabelAndValues.length - 1] = new LabelAndValue(child, 1);
|
||||
childCount++;
|
||||
} else {
|
||||
newLabelAndValues = labelAndValues;
|
||||
}
|
||||
newLabelAndValues =
|
||||
Arrays.stream(newLabelAndValues)
|
||||
.sorted(
|
||||
(o1, o2) -> {
|
||||
if (o1.value.equals(o2.value)) {
|
||||
return new BytesRef(o1.label).compareTo(new BytesRef(o2.label));
|
||||
} else {
|
||||
return o2.value.intValue() - o1.value.intValue();
|
||||
}
|
||||
})
|
||||
.collect(Collectors.toList())
|
||||
.toArray(LabelAndValue[]::new);
|
||||
FacetResult newResult =
|
||||
new FacetResult(result.dim, result.path, 0, newLabelAndValues, childCount);
|
||||
expectedResults.put(parentDimPathString, newResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// second pass to update values
|
||||
for (int i = 0; i < testDocs.size(); i++) {
|
||||
TestDoc doc = testDocs.get(i);
|
||||
if (doc.content.equals(searchToken)) {
|
||||
Set<String> dimsCounted = new HashSet<>();
|
||||
for (SortedSetDocValuesFacetField facetField : testDocFacets.get(i)) {
|
||||
String dimPathString =
|
||||
FacetsConfig.pathToString(facetField.dim, facetField.path);
|
||||
FacetResult result = expectedResults.get(dimPathString);
|
||||
FacetResult dimResult = expectedResults.get(facetField.dim);
|
||||
if (result != null) {
|
||||
expectedResults.put(
|
||||
dimPathString,
|
||||
new FacetResult(
|
||||
result.dim,
|
||||
result.path,
|
||||
result.value.intValue() + 1,
|
||||
result.labelValues,
|
||||
result.childCount));
|
||||
}
|
||||
if (dimResult != null && dimsCounted.add(facetField.dim)) {
|
||||
expectedResults.put(
|
||||
facetField.dim,
|
||||
new FacetResult(
|
||||
dimResult.dim,
|
||||
dimResult.path,
|
||||
dimResult.value.intValue() + 1,
|
||||
dimResult.labelValues,
|
||||
dimResult.childCount));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<FacetResult> expected = new ArrayList<>(expectedResults.values());
|
||||
|
||||
List<FacetResult> expectedAllDims = new ArrayList<>();
|
||||
for (FacetResult result : expected) {
|
||||
if (result.path.length == 0) {
|
||||
expectedAllDims.add(result);
|
||||
if (expectedAllDims.size() >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
sortFacetResults(expectedAllDims);
|
||||
|
||||
List<FacetResult> actualAllDims = facets.getAllDims(10);
|
||||
|
||||
assertEquals(expectedAllDims, actualAllDims);
|
||||
|
||||
// Dfs through top children
|
||||
for (FacetResult dimResult : actualAllDims) {
|
||||
if (config.getDimConfig(dimResult.dim).hierarchical) {
|
||||
Stack<String[]> stack = new Stack<>();
|
||||
for (LabelAndValue labelAndValue : dimResult.labelValues) {
|
||||
String[] path = new String[1];
|
||||
path[0] = labelAndValue.label;
|
||||
stack.add(path);
|
||||
}
|
||||
while (stack.empty() == false) {
|
||||
String[] currPath = stack.pop();
|
||||
FacetResult expectedResult =
|
||||
getFacetResultForPath(expected, dimResult.dim, currPath);
|
||||
FacetResult actualResult = facets.getTopChildren(10, dimResult.dim, currPath);
|
||||
try {
|
||||
assertEquals(expectedResult, actualResult);
|
||||
} catch (AssertionError e) {
|
||||
System.out.println(iter);
|
||||
System.out.println(config.getDimConfig(dimResult.dim).hierarchical);
|
||||
throw e;
|
||||
}
|
||||
if (actualResult != null) {
|
||||
for (LabelAndValue labelAndValue : actualResult.labelValues) {
|
||||
String[] path = new String[currPath.length + 1];
|
||||
System.arraycopy(currPath, 0, path, 0, currPath.length);
|
||||
path[path.length - 1] = labelAndValue.label;
|
||||
stack.add(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (exec != null) exec.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static FacetResult getFacetResultForPath(
|
||||
List<FacetResult> allPaths, String dim, String[] path) {
|
||||
for (FacetResult result : allPaths) {
|
||||
if (path.length == 0) {
|
||||
if (result.path.length == 0 && result.dim.equals(dim)) {
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
boolean isEqualPath = true;
|
||||
if (path.length != result.path.length) {
|
||||
isEqualPath = false;
|
||||
} else {
|
||||
for (int i = 0; i < path.length; i++) {
|
||||
if (path[i].equals(result.path[i]) == false) {
|
||||
isEqualPath = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isEqualPath && result.dim.equals(dim)) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void testNonExistentDimension() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
|
@ -592,6 +1313,75 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testHierarchicalNonExistentDimension() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setHierarchical("fizz", true);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("foo", "bar"));
|
||||
doc.add(new SortedSetDocValuesFacetField("fizz", "buzz", "baz"));
|
||||
writer.addDocument(config.build(doc));
|
||||
writer.commit();
|
||||
|
||||
try (IndexReader r = writer.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
SortedSetDocValuesReaderState state =
|
||||
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
|
||||
|
||||
ExecutorService exec = randomExecutorServiceOrNull();
|
||||
try {
|
||||
Facets facets = getAllFacets(searcher, state, exec);
|
||||
FacetResult result = facets.getTopChildren(5, "non-existent dimension");
|
||||
|
||||
// make sure the result is null (and no exception was thrown)
|
||||
assertNull(result);
|
||||
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
facets.getTopChildren(5, "non-existent dimension", "with a path");
|
||||
});
|
||||
} finally {
|
||||
if (exec != null) exec.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testHierarchicalNonExistentPath() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setHierarchical("fizz", true);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("fizz", "buzz", "baz"));
|
||||
writer.addDocument(config.build(doc));
|
||||
writer.commit();
|
||||
|
||||
try (IndexReader r = writer.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
SortedSetDocValuesReaderState state =
|
||||
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
|
||||
|
||||
ExecutorService exec = randomExecutorServiceOrNull();
|
||||
try {
|
||||
Facets facets = getAllFacets(searcher, state, exec);
|
||||
FacetResult result = facets.getTopChildren(5, "fizz", "fake", "path");
|
||||
|
||||
// make sure the result is null (and no exception was thrown)
|
||||
assertNull(result);
|
||||
} finally {
|
||||
if (exec != null) exec.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Facets getAllFacets(
|
||||
IndexSearcher searcher, SortedSetDocValuesReaderState state, ExecutorService exec)
|
||||
throws IOException, InterruptedException {
|
||||
|
|
|
@ -237,11 +237,6 @@ public class TestFacetLabel extends FacetTestCase {
|
|||
() -> {
|
||||
new SortedSetDocValuesFacetField("", "abc");
|
||||
});
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new SortedSetDocValuesFacetField("dim", null);
|
||||
});
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
|
|
Loading…
Reference in New Issue