mirror of
https://github.com/apache/lucene.git
synced 2025-03-06 16:29:30 +00:00
Reduce duplication in taxonomy facets; always do counts (#12966)
This is a large change, refactoring most of the taxonomy facets code and changing internal behaviour, without changing the API. There are specific API changes this sets us up to do later, e.g. retrieving counts from aggregation facets. 1. Move most of the responsibility from TaxonomyFacets implementations to TaxonomyFacets itself. This reduces code duplication and enables future development. Addresses genericity issue mentioned in #12553. 2. As a consequence, introduce sparse values to FloatTaxonomyFacets, which previously used dense values always. This issue is part of #12576. 3. Compute counts for all taxonomy facets always, which enables us to add an API to retrieve counts for association facets in the future. Addresses #11282. 4. As a consequence of having counts, we can check whether we encountered a label while faceting (count > 0), while previously we relied on the aggregation value to be positive. Closes #12585. 5. Introduce the idea of doing multiple aggregations in one go, with association facets doing the aggregation they were already doing, plus a count. We can extend to an arbitrary number of aggregations, as suggested in #12546. 6. Don't change the API. The only change in behaviour users should notice is the fix for non-positive aggregation values, which were previously discarded. 7. Add tests which were missing for sparse/dense values and non-positive aggregations.
This commit is contained in:
parent
02bc51a753
commit
9ba4af7b88
@ -257,6 +257,9 @@ Improvements
|
||||
* GITHUB#13202: Early terminate graph and exact searches of AbstractKnnVectorQuery to follow timeout set from
|
||||
IndexSearcher#setTimeout(QueryTimeout). (Kaival Parikh)
|
||||
|
||||
* GITHUB#12966: Move most of the responsibility from TaxonomyFacets implementations to TaxonomyFacets itself.
|
||||
This reduces code duplication and enables future development. (Stefan Vodita)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
@ -285,6 +288,9 @@ Optimizations
|
||||
* GITHUB#13149: Made PointRangeQuery faster, for some segment sizes, by reducing the amount of virtual calls to
|
||||
IntersectVisitor::visit(int). (Anton Hägerstrand)
|
||||
|
||||
* GITHUB#12966: FloatTaxonomyFacets can now collect values into a sparse structure, like IntTaxonomyFacets already
|
||||
could. (Stefan Vodita)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
@ -305,6 +311,8 @@ Bug Fixes
|
||||
|
||||
* GITHUB#13206: Subtract deleted file size from the cache size of NRTCachingDirectory. (Jean-François Boeuf)
|
||||
|
||||
* GITHUB#12966: Aggregation facets no longer assume that aggregation values are positive. (Stefan Vodita)
|
||||
|
||||
Build
|
||||
---------------------
|
||||
|
||||
|
@ -180,7 +180,7 @@ public class StringValueFacetCounts extends Facets {
|
||||
|
||||
topN = Math.min(topN, cardinality);
|
||||
TopOrdAndIntQueue q = null;
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
TopOrdAndIntQueue.OrdAndInt reuse = null;
|
||||
int bottomCount = 0;
|
||||
int bottomOrd = Integer.MAX_VALUE;
|
||||
int childCount = 0; // total number of labels with non-zero count
|
||||
@ -191,18 +191,18 @@ public class StringValueFacetCounts extends Facets {
|
||||
int ord = cursor.key;
|
||||
int count = cursor.value;
|
||||
if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = count;
|
||||
if (q == null) {
|
||||
// Lazy init for sparse case:
|
||||
q = new TopOrdAndIntQueue(topN);
|
||||
}
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (reuse == null) {
|
||||
reuse = (TopOrdAndIntQueue.OrdAndInt) q.newOrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = count;
|
||||
reuse = (TopOrdAndIntQueue.OrdAndInt) q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomCount = q.top().value;
|
||||
bottomCount = ((TopOrdAndIntQueue.OrdAndInt) q.top()).value;
|
||||
bottomOrd = q.top().ord;
|
||||
}
|
||||
}
|
||||
@ -213,18 +213,18 @@ public class StringValueFacetCounts extends Facets {
|
||||
if (count != 0) {
|
||||
childCount++;
|
||||
if (count > bottomCount || (count == bottomCount && i < bottomOrd)) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = i;
|
||||
reuse.value = count;
|
||||
if (q == null) {
|
||||
// Lazy init for sparse case:
|
||||
q = new TopOrdAndIntQueue(topN);
|
||||
}
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (reuse == null) {
|
||||
reuse = (TopOrdAndIntQueue.OrdAndInt) q.newOrdAndValue();
|
||||
}
|
||||
reuse.ord = i;
|
||||
reuse.value = count;
|
||||
reuse = (TopOrdAndIntQueue.OrdAndInt) q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomCount = q.top().value;
|
||||
bottomCount = ((TopOrdAndIntQueue.OrdAndInt) q.top()).value;
|
||||
bottomOrd = q.top().ord;
|
||||
}
|
||||
}
|
||||
@ -235,7 +235,7 @@ public class StringValueFacetCounts extends Facets {
|
||||
int resultCount = q == null ? 0 : q.size();
|
||||
LabelAndValue[] labelValues = new LabelAndValue[resultCount];
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
TopOrdAndIntQueue.OrdAndInt ordAndValue = (TopOrdAndIntQueue.OrdAndInt) q.pop();
|
||||
final BytesRef term = docValues.lookupOrd(ordAndValue.ord);
|
||||
labelValues[i] = new LabelAndValue(term.utf8ToString(), ordAndValue.value);
|
||||
}
|
||||
|
@ -16,37 +16,42 @@
|
||||
*/
|
||||
package org.apache.lucene.facet;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Keeps highest results, first by largest float value, then tie break by smallest ord. */
|
||||
public class TopOrdAndFloatQueue extends PriorityQueue<TopOrdAndFloatQueue.OrdAndValue> {
|
||||
|
||||
/** Holds a single entry. */
|
||||
public static final class OrdAndValue {
|
||||
|
||||
/** Ordinal of the entry. */
|
||||
public int ord;
|
||||
|
||||
/** Value associated with the ordinal. */
|
||||
public float value;
|
||||
|
||||
/** Default constructor. */
|
||||
public OrdAndValue() {}
|
||||
}
|
||||
/** Keeps highest results, first by largest float value, then tie-break by smallest ord. */
|
||||
public class TopOrdAndFloatQueue extends TopOrdAndNumberQueue {
|
||||
|
||||
/** Sole constructor. */
|
||||
public TopOrdAndFloatQueue(int topN) {
|
||||
super(topN);
|
||||
}
|
||||
|
||||
/** Holds an ordinal and a float value. */
|
||||
public static final class OrdAndFloat extends OrdAndValue {
|
||||
/** The value corresponding to the ordinal is a float. */
|
||||
public float value;
|
||||
|
||||
/** Default constructor. */
|
||||
public OrdAndFloat() {}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(OrdAndValue a, OrdAndValue b) {
|
||||
if (a.value < b.value) {
|
||||
public boolean lessThan(OrdAndValue other) {
|
||||
OrdAndFloat otherOrdAndFloat = (OrdAndFloat) other;
|
||||
if (value < otherOrdAndFloat.value) {
|
||||
return true;
|
||||
} else if (a.value > b.value) {
|
||||
return false;
|
||||
} else {
|
||||
return a.ord > b.ord;
|
||||
}
|
||||
if (value > otherOrdAndFloat.value) {
|
||||
return false;
|
||||
}
|
||||
return ord > otherOrdAndFloat.ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getValue() {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public OrdAndValue newOrdAndValue() {
|
||||
return new OrdAndFloat();
|
||||
}
|
||||
}
|
||||
|
@ -16,37 +16,42 @@
|
||||
*/
|
||||
package org.apache.lucene.facet;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Keeps highest results, first by largest int value, then tie break by smallest ord. */
|
||||
public class TopOrdAndIntQueue extends PriorityQueue<TopOrdAndIntQueue.OrdAndValue> {
|
||||
|
||||
/** Holds a single entry. */
|
||||
public static final class OrdAndValue {
|
||||
|
||||
/** Ordinal of the entry. */
|
||||
public int ord;
|
||||
|
||||
/** Value associated with the ordinal. */
|
||||
public int value;
|
||||
|
||||
/** Default constructor. */
|
||||
public OrdAndValue() {}
|
||||
}
|
||||
/** Keeps highest results, first by largest int value, then tie-break by smallest ord. */
|
||||
public class TopOrdAndIntQueue extends TopOrdAndNumberQueue {
|
||||
|
||||
/** Sole constructor. */
|
||||
public TopOrdAndIntQueue(int topN) {
|
||||
super(topN);
|
||||
}
|
||||
|
||||
/** Holds an ordinal and an int value. */
|
||||
public static final class OrdAndInt extends OrdAndValue {
|
||||
/** The value corresponding to the ordinal is an int. */
|
||||
public int value;
|
||||
|
||||
/** Default constructor. */
|
||||
public OrdAndInt() {}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(OrdAndValue a, OrdAndValue b) {
|
||||
if (a.value < b.value) {
|
||||
public boolean lessThan(OrdAndValue other) {
|
||||
OrdAndInt otherOrdAndInt = (OrdAndInt) other;
|
||||
if (value < otherOrdAndInt.value) {
|
||||
return true;
|
||||
} else if (a.value > b.value) {
|
||||
return false;
|
||||
} else {
|
||||
return a.ord > b.ord;
|
||||
}
|
||||
if (value > otherOrdAndInt.value) {
|
||||
return false;
|
||||
}
|
||||
return ord > otherOrdAndInt.ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getValue() {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public OrdAndValue newOrdAndValue() {
|
||||
return new OrdAndInt();
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Keeps highest results, first by largest value, then tie-break by smallest ord. */
|
||||
public abstract class TopOrdAndNumberQueue extends PriorityQueue<TopOrdAndNumberQueue.OrdAndValue> {
|
||||
|
||||
/** Holds a single entry. */
|
||||
public abstract static class OrdAndValue {
|
||||
|
||||
/** Ordinal of the entry. */
|
||||
public int ord;
|
||||
|
||||
/** Default constructor. */
|
||||
public OrdAndValue() {}
|
||||
|
||||
/** Compare with another {@link OrdAndValue}. */
|
||||
public abstract boolean lessThan(OrdAndValue other);
|
||||
|
||||
/** Get the value stored in this {@link OrdAndValue}. */
|
||||
public abstract Number getValue();
|
||||
}
|
||||
|
||||
/** Sole constructor. */
|
||||
public TopOrdAndNumberQueue(int topN) {
|
||||
super(topN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean lessThan(TopOrdAndNumberQueue.OrdAndValue a, TopOrdAndNumberQueue.OrdAndValue b) {
|
||||
return a.lessThan(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link org.apache.lucene.facet.TopOrdAndNumberQueue.OrdAndValue} of the
|
||||
* appropriate type.
|
||||
*/
|
||||
public abstract OrdAndValue newOrdAndValue();
|
||||
}
|
@ -327,7 +327,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
||||
int pathCount = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
TopOrdAndIntQueue.OrdAndInt reuse = null;
|
||||
while (childOrds.hasNext()) {
|
||||
int ord = childOrds.next();
|
||||
int count = getCount(ord);
|
||||
@ -335,20 +335,20 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
||||
pathCount += count;
|
||||
childCount++;
|
||||
if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = count;
|
||||
if (q == null) {
|
||||
// Lazy init, so we don't create this for the
|
||||
// sparse case unnecessarily
|
||||
q = new TopOrdAndIntQueue(topN);
|
||||
}
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (reuse == null) {
|
||||
reuse = (TopOrdAndIntQueue.OrdAndInt) q.newOrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = count;
|
||||
reuse = (TopOrdAndIntQueue.OrdAndInt) q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomCount = q.top().value;
|
||||
bottomOrd = q.top().value;
|
||||
bottomCount = ((TopOrdAndIntQueue.OrdAndInt) q.top()).value;
|
||||
bottomOrd = q.top().ord;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -396,7 +396,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
TopOrdAndIntQueue.OrdAndInt ordAndValue = (TopOrdAndIntQueue.OrdAndInt) q.pop();
|
||||
assert ordAndValue != null;
|
||||
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
|
||||
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
|
||||
|
@ -37,7 +37,7 @@ import org.apache.lucene.util.Bits;
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
public class FastTaxonomyFacetCounts extends TaxonomyFacets {
|
||||
|
||||
/** Create {@code FastTaxonomyFacetCounts}, which also counts all facet labels. */
|
||||
public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
|
||||
@ -53,7 +53,7 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
public FastTaxonomyFacetCounts(
|
||||
String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
|
||||
throws IOException {
|
||||
super(indexFieldName, taxoReader, config, AssociationAggregationFunction.SUM, fc);
|
||||
super(indexFieldName, taxoReader, config, fc);
|
||||
count(fc.getMatchingDocs());
|
||||
}
|
||||
|
||||
@ -65,7 +65,7 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
public FastTaxonomyFacetCounts(
|
||||
String indexFieldName, IndexReader reader, TaxonomyReader taxoReader, FacetsConfig config)
|
||||
throws IOException {
|
||||
super(indexFieldName, taxoReader, config, AssociationAggregationFunction.SUM, null);
|
||||
super(indexFieldName, taxoReader, config, null);
|
||||
countAll(reader);
|
||||
}
|
||||
|
||||
@ -88,26 +88,26 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), valuesIt));
|
||||
|
||||
if (singleValued != null) {
|
||||
if (values != null) {
|
||||
if (counts != null) {
|
||||
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
values[(int) singleValued.longValue()]++;
|
||||
counts[(int) singleValued.longValue()]++;
|
||||
}
|
||||
} else {
|
||||
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
sparseValues.addTo((int) singleValued.longValue(), 1);
|
||||
sparseCounts.addTo((int) singleValued.longValue(), 1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (values != null) {
|
||||
if (counts != null) {
|
||||
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
for (int i = 0; i < multiValued.docValueCount(); i++) {
|
||||
values[(int) multiValued.nextValue()]++;
|
||||
counts[(int) multiValued.nextValue()]++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
for (int i = 0; i < multiValued.docValueCount(); i++) {
|
||||
sparseValues.addTo((int) multiValued.nextValue(), 1);
|
||||
sparseCounts.addTo((int) multiValued.nextValue(), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -125,7 +125,7 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
continue;
|
||||
}
|
||||
initializeValueCounters();
|
||||
assert values != null;
|
||||
assert counts != null;
|
||||
|
||||
Bits liveDocs = context.reader().getLiveDocs();
|
||||
|
||||
@ -135,7 +135,7 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
for (int doc = singleValued.nextDoc();
|
||||
doc != DocIdSetIterator.NO_MORE_DOCS;
|
||||
doc = singleValued.nextDoc()) {
|
||||
values[(int) singleValued.longValue()]++;
|
||||
counts[(int) singleValued.longValue()]++;
|
||||
}
|
||||
} else {
|
||||
for (int doc = singleValued.nextDoc();
|
||||
@ -144,7 +144,7 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
if (liveDocs.get(doc) == false) {
|
||||
continue;
|
||||
}
|
||||
values[(int) singleValued.longValue()]++;
|
||||
counts[(int) singleValued.longValue()]++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -153,7 +153,7 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
doc != DocIdSetIterator.NO_MORE_DOCS;
|
||||
doc = multiValued.nextDoc()) {
|
||||
for (int i = 0; i < multiValued.docValueCount(); i++) {
|
||||
values[(int) multiValued.nextValue()]++;
|
||||
counts[(int) multiValued.nextValue()]++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -164,7 +164,7 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
continue;
|
||||
}
|
||||
for (int i = 0; i < multiValued.docValueCount(); i++) {
|
||||
values[(int) multiValued.nextValue()]++;
|
||||
counts[(int) multiValued.nextValue()]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,33 +16,25 @@
|
||||
*/
|
||||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import com.carrotsearch.hppc.FloatArrayList;
|
||||
import com.carrotsearch.hppc.IntArrayList;
|
||||
import com.carrotsearch.hppc.IntFloatHashMap;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.FacetsConfig.DimConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.TopOrdAndFloatQueue;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.facet.TopOrdAndNumberQueue;
|
||||
|
||||
/** Base class for all taxonomy-based facets that aggregate to a per-ords float[]. */
|
||||
/** Base class for all taxonomy-based facets that aggregate to float. */
|
||||
abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||
|
||||
// TODO: also use native hash map for sparse collection, like IntTaxonomyFacets
|
||||
|
||||
/** Aggregation function used for combining values. */
|
||||
final AssociationAggregationFunction aggregationFunction;
|
||||
protected final AssociationAggregationFunction aggregationFunction;
|
||||
|
||||
/** Per-ordinal value. */
|
||||
/** Dense ordinal values. */
|
||||
float[] values;
|
||||
|
||||
/** Sparse ordinal values. */
|
||||
IntFloatHashMap sparseValues;
|
||||
|
||||
/** Sole constructor. */
|
||||
FloatTaxonomyFacets(
|
||||
String indexFieldName,
|
||||
@ -53,354 +45,107 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||
throws IOException {
|
||||
super(indexFieldName, taxoReader, config, fc);
|
||||
this.aggregationFunction = aggregationFunction;
|
||||
valueComparator = (o1, o2) -> Float.compare(o1.floatValue(), o2.floatValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean hasValues() {
|
||||
return values != null;
|
||||
protected void initializeValueCounters() {
|
||||
if (initialized) {
|
||||
return;
|
||||
}
|
||||
super.initializeValueCounters();
|
||||
|
||||
void initializeValueCounters() {
|
||||
if (values == null) {
|
||||
assert sparseValues == null && values == null;
|
||||
if (sparseCounts != null) {
|
||||
sparseValues = new IntFloatHashMap();
|
||||
} else {
|
||||
values = new float[taxoReader.getSize()];
|
||||
}
|
||||
}
|
||||
|
||||
/** Rolls up any single-valued hierarchical dimensions. */
|
||||
void rollup() throws IOException {
|
||||
if (values == null) {
|
||||
return;
|
||||
/** Set the value associated with this ordinal to {@code newValue}. */
|
||||
void setValue(int ordinal, float newValue) {
|
||||
if (sparseValues != null) {
|
||||
sparseValues.put(ordinal, newValue);
|
||||
} else {
|
||||
values[ordinal] = newValue;
|
||||
}
|
||||
}
|
||||
|
||||
// Rollup any necessary dims:
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
DimConfig ft = ent.getValue();
|
||||
if (ft.hierarchical && ft.multiValued == false) {
|
||||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||
assert dimRootOrd > 0;
|
||||
float newValue =
|
||||
aggregationFunction.aggregate(values[dimRootOrd], rollup(children.get(dimRootOrd)));
|
||||
values[dimRootOrd] = newValue;
|
||||
/** Get the value associated with this ordinal. */
|
||||
float getValue(int ordinal) {
|
||||
if (sparseValues != null) {
|
||||
return sparseValues.get(ordinal);
|
||||
} else {
|
||||
return values[ordinal];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Number getAggregationValue(int ordinal) {
|
||||
return getValue(ordinal);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Number aggregate(Number existingVal, Number newVal) {
|
||||
return aggregationFunction.aggregate(existingVal.floatValue(), newVal.floatValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void updateValueFromRollup(int ordinal, int childOrdinal) throws IOException {
|
||||
super.updateValueFromRollup(ordinal, childOrdinal);
|
||||
float currentValue = getValue(ordinal);
|
||||
float newValue = aggregationFunction.aggregate(currentValue, rollup(childOrdinal));
|
||||
setValue(ordinal, newValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TopOrdAndNumberQueue makeTopOrdAndNumberQueue(int topN) {
|
||||
return new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Number missingAggregationValue() {
|
||||
return -1f;
|
||||
}
|
||||
|
||||
private float rollup(int ord) throws IOException {
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
float aggregationValue = 0f;
|
||||
float aggregatedValue = 0f;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
float childValue = aggregationFunction.aggregate(values[ord], rollup(children.get(ord)));
|
||||
values[ord] = childValue;
|
||||
aggregationValue = aggregationFunction.aggregate(aggregationValue, childValue);
|
||||
updateValueFromRollup(ord, children.get(ord));
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, getValue(ord));
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
return aggregationValue;
|
||||
return aggregatedValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
DimConfig dimConfig = verifyDim(dim);
|
||||
if (path.length == 0) {
|
||||
if (dimConfig.hierarchical && dimConfig.multiValued == false) {
|
||||
// ok: rolled up at search time
|
||||
} else if (dimConfig.requireDimCount && dimConfig.multiValued) {
|
||||
// ok: we indexed all ords at index time
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot return dimension-level value alone; use getTopChildren instead");
|
||||
}
|
||||
}
|
||||
int ord = taxoReader.getOrdinal(new FacetLabel(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return values == null ? 0 : values[ord];
|
||||
protected void setIncomingValue(TopOrdAndNumberQueue.OrdAndValue incomingOrdAndValue, int ord) {
|
||||
((TopOrdAndFloatQueue.OrdAndFloat) incomingOrdAndValue).value = getValue(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getAllChildren(String dim, String... path) throws IOException {
|
||||
DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = new FacetLabel(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
return null;
|
||||
}
|
||||
protected class FloatAggregatedValue extends AggregatedValue {
|
||||
private float value;
|
||||
|
||||
if (values == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
|
||||
int ord = children.get(dimOrd);
|
||||
float aggregatedValue = 0;
|
||||
|
||||
IntArrayList ordinals = new IntArrayList();
|
||||
FloatArrayList ordValues = new FloatArrayList();
|
||||
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
if (values[ord] > 0) {
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, values[ord]);
|
||||
ordinals.add(ord);
|
||||
ordValues.add(values[ord]);
|
||||
}
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
if (aggregatedValue == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
aggregatedValue = values[dimOrd];
|
||||
} else {
|
||||
// Our sum'd count is not correct, in general:
|
||||
aggregatedValue = -1;
|
||||
}
|
||||
} else {
|
||||
// Our sum'd dim count is accurate, so we keep it
|
||||
}
|
||||
|
||||
// TODO: It would be nice if TaxonomyReader let us pass in a buffer + size so we didn't have to
|
||||
// do an array copy here:
|
||||
FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals.toArray());
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[ordValues.size()];
|
||||
for (int i = 0; i < labelValues.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], ordValues.get(i));
|
||||
}
|
||||
return new FacetResult(dim, path, aggregatedValue, labelValues, ordinals.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
validateTopN(topN);
|
||||
DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = new FacetLabel(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (values == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
|
||||
return createFacetResult(topChildrenForPath, dim, path);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the top-n children for a specified dimension + path. Results are in an intermediate
|
||||
* form.
|
||||
*/
|
||||
private TopChildrenForPath getTopChildrenForPath(DimConfig dimConfig, int pathOrd, int topN)
|
||||
throws IOException {
|
||||
|
||||
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
|
||||
float bottomValue = 0;
|
||||
int bottomOrd = Integer.MAX_VALUE;
|
||||
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
|
||||
int ord = children.get(pathOrd);
|
||||
float aggregatedValue = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndFloatQueue.OrdAndValue reuse = null;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
float value = values[ord];
|
||||
if (value > 0) {
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||
childCount++;
|
||||
if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndFloatQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = value;
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomValue = q.top().value;
|
||||
bottomOrd = q.top().ord;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
aggregatedValue = values[pathOrd];
|
||||
} else {
|
||||
// Our sum'd count is not correct, in general:
|
||||
aggregatedValue = -1;
|
||||
}
|
||||
}
|
||||
return new TopChildrenForPath(aggregatedValue, childCount, q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a FacetResult for the provided dim + path and intermediate results. Does the extra work
|
||||
* of resolving ordinals -> labels, etc. Will return null if there are no children.
|
||||
*/
|
||||
FacetResult createFacetResult(TopChildrenForPath topChildrenForPath, String dim, String... path)
|
||||
throws IOException {
|
||||
// If the intermediate result is null or there are no children, we return null:
|
||||
if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndFloatQueue q = topChildrenForPath.childQueue;
|
||||
assert q != null;
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
int[] ordinals = new int[labelValues.length];
|
||||
float[] values = new float[labelValues.length];
|
||||
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
ordinals[i] = ordAndValue.ord;
|
||||
values[i] = ordAndValue.value;
|
||||
}
|
||||
|
||||
FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
|
||||
// The path component we're interested in is the one immediately after the provided path. We
|
||||
// add 1 here to also account for the dim:
|
||||
int childComponentIdx = path.length + 1;
|
||||
for (int i = 0; i < labelValues.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[childComponentIdx], values[i]);
|
||||
}
|
||||
|
||||
return new FacetResult(
|
||||
dim, path, topChildrenForPath.pathValue, labelValues, topChildrenForPath.childCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
|
||||
validateTopN(topNDims);
|
||||
validateTopN(topNChildren);
|
||||
|
||||
if (values == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
// get existing children and siblings ordinal array from TaxonomyFacets
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
|
||||
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValue> pq =
|
||||
new PriorityQueue<>(topNDims) {
|
||||
@Override
|
||||
protected boolean lessThan(DimValue a, DimValue b) {
|
||||
if (a.value > b.value) {
|
||||
return false;
|
||||
} else if (a.value < b.value) {
|
||||
return true;
|
||||
} else {
|
||||
return a.dim.compareTo(b.dim) > 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Keep track of intermediate results, if we compute them, so we can reuse them later:
|
||||
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||
|
||||
// iterate over children and siblings ordinals for all dims
|
||||
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
String dim = taxoReader.getPath(ord).components[0];
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
if (dimConfig.indexFieldName.equals(indexFieldName)) {
|
||||
FacetLabel cp = new FacetLabel(dim);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd != -1) {
|
||||
float dimValue;
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
// If the dim is configured as multi-valued and requires dim counts, we can access
|
||||
// an accurate count for the dim computed at indexing time:
|
||||
dimValue = values[dimOrd];
|
||||
} else {
|
||||
// If the dim is configured as multi-valued but not requiring dim counts, we cannot
|
||||
// compute an accurate dim count, and use -1 as a place-holder:
|
||||
dimValue = -1;
|
||||
}
|
||||
} else {
|
||||
// Single-valued dims require aggregating descendant paths to get accurate dim counts
|
||||
// since we don't directly access ancestry paths:
|
||||
// TODO: We could consider indexing dim counts directly if getTopDims is a common
|
||||
// use-case.
|
||||
TopChildrenForPath topChildrenForPath =
|
||||
getTopChildrenForPath(dimConfig, dimOrd, topNChildren);
|
||||
if (intermediateResults == null) {
|
||||
intermediateResults = new HashMap<>();
|
||||
}
|
||||
intermediateResults.put(dim, topChildrenForPath);
|
||||
dimValue = topChildrenForPath.pathValue;
|
||||
}
|
||||
if (dimValue != 0) {
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValue(dim, dimOrd, dimValue));
|
||||
} else {
|
||||
if (dimValue > pq.top().value
|
||||
|| (dimValue == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValue bottomDim = pq.top();
|
||||
bottomDim.dim = dim;
|
||||
bottomDim.value = dimValue;
|
||||
pq.updateTop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
FacetResult[] results = new FacetResult[pq.size()];
|
||||
|
||||
while (pq.size() > 0) {
|
||||
DimValue dimValue = pq.pop();
|
||||
assert dimValue != null;
|
||||
String dim = dimValue.dim;
|
||||
TopChildrenForPath topChildrenForPath = null;
|
||||
if (intermediateResults != null) {
|
||||
topChildrenForPath = intermediateResults.get(dim);
|
||||
}
|
||||
if (topChildrenForPath == null) {
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
topChildrenForPath = getTopChildrenForPath(dimConfig, dimValue.dimOrd, topNChildren);
|
||||
}
|
||||
FacetResult facetResult = createFacetResult(topChildrenForPath, dim);
|
||||
assert facetResult != null;
|
||||
results[pq.size()] = facetResult;
|
||||
}
|
||||
return Arrays.asList(results);
|
||||
}
|
||||
|
||||
private static class DimValue {
|
||||
String dim;
|
||||
int dimOrd;
|
||||
float value;
|
||||
|
||||
DimValue(String dim, int dimOrd, float value) {
|
||||
this.dim = dim;
|
||||
this.dimOrd = dimOrd;
|
||||
public FloatAggregatedValue(float value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate(int ord) {
|
||||
value = aggregationFunction.aggregate(value, getValue(ord));
|
||||
}
|
||||
|
||||
/** Intermediate result to store top children for a given path before resolving labels, etc. */
|
||||
private record TopChildrenForPath(
|
||||
float pathValue, int childCount, TopOrdAndFloatQueue childQueue) {}
|
||||
@Override
|
||||
public Number get() {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AggregatedValue newAggregatedValue() {
|
||||
return new FloatAggregatedValue(0f);
|
||||
}
|
||||
}
|
||||
|
@ -16,29 +16,19 @@
|
||||
*/
|
||||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import com.carrotsearch.hppc.IntArrayList;
|
||||
import com.carrotsearch.hppc.IntIntHashMap;
|
||||
import com.carrotsearch.hppc.cursors.IntIntCursor;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import java.util.Comparator;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.FacetsConfig.DimConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.TopOrdAndIntQueue;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.facet.TopOrdAndNumberQueue;
|
||||
|
||||
/** Base class for all taxonomy-based facets that aggregate to a per-ords int[]. */
|
||||
/** Base class for all taxonomy-based facets that aggregate to int. */
|
||||
abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||
|
||||
/** Aggregation function used for combining values. */
|
||||
final AssociationAggregationFunction aggregationFunction;
|
||||
protected final AssociationAggregationFunction aggregationFunction;
|
||||
|
||||
/** Dense ordinal values. */
|
||||
int[] values;
|
||||
@ -46,9 +36,6 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||
/** Sparse ordinal values. */
|
||||
IntIntHashMap sparseValues;
|
||||
|
||||
/** Have value counters been initialized. */
|
||||
boolean initialized;
|
||||
|
||||
/** Sole constructor. */
|
||||
IntTaxonomyFacets(
|
||||
String indexFieldName,
|
||||
@ -59,27 +46,25 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||
throws IOException {
|
||||
super(indexFieldName, taxoReader, config, fc);
|
||||
this.aggregationFunction = aggregationFunction;
|
||||
valueComparator = Comparator.comparingInt(o -> (int) o);
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean hasValues() {
|
||||
return initialized;
|
||||
}
|
||||
|
||||
void initializeValueCounters() {
|
||||
protected void initializeValueCounters() {
|
||||
if (initialized) {
|
||||
return;
|
||||
}
|
||||
initialized = true;
|
||||
super.initializeValueCounters();
|
||||
|
||||
assert sparseValues == null && values == null;
|
||||
if (useHashTable(fc, taxoReader)) {
|
||||
if (sparseCounts != null) {
|
||||
sparseValues = new IntIntHashMap();
|
||||
} else {
|
||||
values = new int[taxoReader.getSize()];
|
||||
}
|
||||
}
|
||||
|
||||
/** Set the count for this ordinal to {@code newValue}. */
|
||||
/** Set the value associated with this ordinal to {@code newValue}. */
|
||||
void setValue(int ordinal, int newValue) {
|
||||
if (sparseValues != null) {
|
||||
sparseValues.put(ordinal, newValue);
|
||||
@ -88,7 +73,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the count for this ordinal. */
|
||||
/** Get the value associated with this ordinal. */
|
||||
int getValue(int ordinal) {
|
||||
if (sparseValues != null) {
|
||||
return sparseValues.get(ordinal);
|
||||
@ -97,33 +82,22 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||
}
|
||||
}
|
||||
|
||||
/** Rolls up any single-valued hierarchical dimensions. */
|
||||
void rollup() throws IOException {
|
||||
if (initialized == false) {
|
||||
return;
|
||||
@Override
|
||||
protected Number getAggregationValue(int ordinal) {
|
||||
return getValue(ordinal);
|
||||
}
|
||||
|
||||
// Rollup any necessary dims:
|
||||
ParallelTaxonomyArrays.IntArray children = null;
|
||||
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
DimConfig ft = ent.getValue();
|
||||
if (ft.hierarchical && ft.multiValued == false) {
|
||||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||
// It can be -1 if this field was declared in the
|
||||
// config but never indexed:
|
||||
if (dimRootOrd > 0) {
|
||||
if (children == null) {
|
||||
// lazy init
|
||||
children = getChildren();
|
||||
}
|
||||
int currentValue = getValue(dimRootOrd);
|
||||
int newValue =
|
||||
aggregationFunction.aggregate(currentValue, rollup(children.get(dimRootOrd)));
|
||||
setValue(dimRootOrd, newValue);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
protected Number aggregate(Number existingVal, Number newVal) {
|
||||
return aggregationFunction.aggregate((int) existingVal, (int) newVal);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void updateValueFromRollup(int ordinal, int childOrdinal) throws IOException {
|
||||
super.updateValueFromRollup(ordinal, childOrdinal);
|
||||
int currentValue = getValue(ordinal);
|
||||
int newValue = aggregationFunction.aggregate(currentValue, rollup(childOrdinal));
|
||||
setValue(ordinal, newValue);
|
||||
}
|
||||
|
||||
private int rollup(int ord) throws IOException {
|
||||
@ -131,374 +105,38 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int aggregatedValue = 0;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int currentValue = getValue(ord);
|
||||
int newValue = aggregationFunction.aggregate(currentValue, rollup(children.get(ord)));
|
||||
setValue(ord, newValue);
|
||||
updateValueFromRollup(ord, children.get(ord));
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, getValue(ord));
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
return aggregatedValue;
|
||||
}
|
||||
|
||||
/** Return true if a sparse hash table should be used for counting, instead of a dense int[]. */
|
||||
private boolean useHashTable(FacetsCollector fc, TaxonomyReader taxoReader) {
|
||||
if (taxoReader.getSize() < 1024) {
|
||||
// small number of unique values: use an array
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fc == null) {
|
||||
// counting all docs: use an array
|
||||
return false;
|
||||
}
|
||||
|
||||
int maxDoc = 0;
|
||||
int sumTotalHits = 0;
|
||||
for (MatchingDocs docs : fc.getMatchingDocs()) {
|
||||
sumTotalHits += docs.totalHits;
|
||||
maxDoc += docs.context.reader().maxDoc();
|
||||
}
|
||||
|
||||
// if our result set is < 10% of the index, we collect sparsely (use hash map):
|
||||
return sumTotalHits < maxDoc / 10;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
DimConfig dimConfig = verifyDim(dim);
|
||||
if (path.length == 0) {
|
||||
if (dimConfig.hierarchical && dimConfig.multiValued == false) {
|
||||
// ok: rolled up at search time
|
||||
} else if (dimConfig.requireDimCount && dimConfig.multiValued) {
|
||||
// ok: we indexed all ords at index time
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot return dimension-level value alone; use getTopChildren instead");
|
||||
}
|
||||
}
|
||||
int ord = taxoReader.getOrdinal(new FacetLabel(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return initialized ? getValue(ord) : 0;
|
||||
protected void setIncomingValue(TopOrdAndNumberQueue.OrdAndValue incomingOrdAndValue, int ord) {
|
||||
((TopOrdAndIntQueue.OrdAndInt) incomingOrdAndValue).value = getValue(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getAllChildren(String dim, String... path) throws IOException {
|
||||
DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = new FacetLabel(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
return null;
|
||||
}
|
||||
protected class IntAggregatedValue extends AggregatedValue {
|
||||
private int value;
|
||||
|
||||
if (initialized == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int aggregatedValue = 0;
|
||||
|
||||
IntArrayList ordinals = new IntArrayList();
|
||||
IntArrayList ordValues = new IntArrayList();
|
||||
|
||||
if (sparseValues != null) {
|
||||
for (IntIntCursor c : sparseValues) {
|
||||
int value = c.value;
|
||||
int ord = c.key;
|
||||
if (parents.get(ord) == dimOrd && value > 0) {
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||
ordinals.add(ord);
|
||||
ordValues.add(value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int ord = children.get(dimOrd);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int value = values[ord];
|
||||
if (value > 0) {
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||
ordinals.add(ord);
|
||||
ordValues.add(value);
|
||||
}
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
}
|
||||
|
||||
if (aggregatedValue == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
aggregatedValue = getValue(dimOrd);
|
||||
} else {
|
||||
// Our sum'd value is not correct, in general:
|
||||
aggregatedValue = -1;
|
||||
}
|
||||
} else {
|
||||
// Our sum'd dim value is accurate, so we keep it
|
||||
}
|
||||
|
||||
// TODO: It would be nice if TaxonomyReader let us pass in a buffer + size so we didn't have to
|
||||
// do an array copy here:
|
||||
FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals.toArray());
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[ordValues.size()];
|
||||
for (int i = 0; i < ordValues.size(); i++) {
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], ordValues.get(i));
|
||||
}
|
||||
return new FacetResult(dim, path, aggregatedValue, labelValues, ordinals.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
validateTopN(topN);
|
||||
DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = new FacetLabel(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (initialized == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
|
||||
return createFacetResult(topChildrenForPath, dim, path);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the top-n children for a specified dimension + path. Results are in an intermediate
|
||||
* form.
|
||||
*/
|
||||
private TopChildrenForPath getTopChildrenForPath(DimConfig dimConfig, int pathOrd, int topN)
|
||||
throws IOException {
|
||||
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
|
||||
int bottomValue = 0;
|
||||
int bottomOrd = Integer.MAX_VALUE;
|
||||
|
||||
int aggregatedValue = 0;
|
||||
int childCount = 0;
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
|
||||
// TODO: would be faster if we had a "get the following children" API? then we
|
||||
// can make a single pass over the hashmap
|
||||
if (sparseValues != null) {
|
||||
for (IntIntCursor c : sparseValues) {
|
||||
int value = c.value;
|
||||
int ord = c.key;
|
||||
if (parents.get(ord) == pathOrd && value > 0) {
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||
childCount++;
|
||||
if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = value;
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomValue = q.top().value;
|
||||
bottomOrd = q.top().ord;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int ord = children.get(pathOrd);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int value = values[ord];
|
||||
if (value > 0) {
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||
childCount++;
|
||||
if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = value;
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomValue = q.top().value;
|
||||
bottomOrd = q.top().ord;
|
||||
}
|
||||
}
|
||||
}
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
aggregatedValue = getValue(pathOrd);
|
||||
} else {
|
||||
// Our sum'd value is not correct, in general:
|
||||
aggregatedValue = -1;
|
||||
}
|
||||
}
|
||||
|
||||
return new TopChildrenForPath(aggregatedValue, childCount, q);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
|
||||
if (topNDims <= 0 || topNChildren <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0");
|
||||
}
|
||||
|
||||
if (initialized == false) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
// get children and siblings ordinal array from TaxonomyFacets
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
|
||||
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValue> pq =
|
||||
new PriorityQueue<>(topNDims) {
|
||||
@Override
|
||||
protected boolean lessThan(DimValue a, DimValue b) {
|
||||
if (a.value > b.value) {
|
||||
return false;
|
||||
} else if (a.value < b.value) {
|
||||
return true;
|
||||
} else {
|
||||
return a.dim.compareTo(b.dim) > 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Keep track of intermediate results, if we compute them, so we can reuse them later:
|
||||
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||
|
||||
// iterate over children and siblings ordinals for all dims
|
||||
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
String dim = taxoReader.getPath(ord).components[0];
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
if (dimConfig.indexFieldName.equals(indexFieldName)) {
|
||||
FacetLabel cp = new FacetLabel(dim);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd != -1) {
|
||||
int dimValue;
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
// If the dim is configured as multi-valued and requires dim counts, we can access
|
||||
// an accurate count for the dim computed at indexing time:
|
||||
dimValue = getValue(dimOrd);
|
||||
} else {
|
||||
// If the dim is configured as multi-valued but not requiring dim counts, we cannot
|
||||
// compute an accurate dim count, and use -1 as a place-holder:
|
||||
dimValue = -1;
|
||||
}
|
||||
} else {
|
||||
// Single-valued dims require aggregating descendant paths to get accurate dim counts
|
||||
// since we don't directly access ancestry paths:
|
||||
// TODO: We could consider indexing dim counts directly if getTopDims is a common
|
||||
// use-case.
|
||||
TopChildrenForPath topChildrenForPath =
|
||||
getTopChildrenForPath(dimConfig, dimOrd, topNChildren);
|
||||
if (intermediateResults == null) {
|
||||
intermediateResults = new HashMap<>();
|
||||
}
|
||||
intermediateResults.put(dim, topChildrenForPath);
|
||||
dimValue = topChildrenForPath.pathValue;
|
||||
}
|
||||
if (dimValue != 0) {
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValue(dim, dimOrd, dimValue));
|
||||
} else {
|
||||
if (dimValue > pq.top().value
|
||||
|| (dimValue == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValue bottomDim = pq.top();
|
||||
bottomDim.dim = dim;
|
||||
bottomDim.value = dimValue;
|
||||
pq.updateTop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
FacetResult[] results = new FacetResult[pq.size()];
|
||||
|
||||
while (pq.size() > 0) {
|
||||
DimValue dimValue = pq.pop();
|
||||
assert dimValue != null;
|
||||
String dim = dimValue.dim;
|
||||
TopChildrenForPath topChildrenForPath = null;
|
||||
if (intermediateResults != null) {
|
||||
topChildrenForPath = intermediateResults.get(dim);
|
||||
}
|
||||
if (topChildrenForPath == null) {
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
topChildrenForPath = getTopChildrenForPath(dimConfig, dimValue.dimOrd, topNChildren);
|
||||
}
|
||||
FacetResult facetResult = createFacetResult(topChildrenForPath, dim);
|
||||
assert facetResult != null;
|
||||
results[pq.size()] = facetResult;
|
||||
}
|
||||
return Arrays.asList(results);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a FacetResult for the provided dim + path and intermediate results. Does the extra work
|
||||
* of resolving ordinals -> labels, etc. Will return null if there are no children.
|
||||
*/
|
||||
FacetResult createFacetResult(TopChildrenForPath topChildrenForPath, String dim, String... path)
|
||||
throws IOException {
|
||||
// If the intermediate result is null or there are no children, we return null:
|
||||
if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndIntQueue q = topChildrenForPath.childQueue;
|
||||
assert q != null;
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
int[] ordinals = new int[labelValues.length];
|
||||
int[] values = new int[labelValues.length];
|
||||
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
ordinals[i] = ordAndValue.ord;
|
||||
values[i] = ordAndValue.value;
|
||||
}
|
||||
|
||||
FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
|
||||
// The path component we're interested in is the one immediately after the provided path. We
|
||||
// add 1 here to also account for the dim:
|
||||
int childComponentIdx = path.length + 1;
|
||||
for (int i = 0; i < labelValues.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[childComponentIdx], values[i]);
|
||||
}
|
||||
|
||||
return new FacetResult(
|
||||
dim, path, topChildrenForPath.pathValue, labelValues, topChildrenForPath.childCount);
|
||||
}
|
||||
|
||||
private static class DimValue {
|
||||
String dim;
|
||||
int dimOrd;
|
||||
int value;
|
||||
|
||||
DimValue(String dim, int dimOrd, int value) {
|
||||
this.dim = dim;
|
||||
this.dimOrd = dimOrd;
|
||||
public IntAggregatedValue(int value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate(int ord) {
|
||||
value = aggregationFunction.aggregate(value, getValue(ord));
|
||||
}
|
||||
|
||||
/** Intermediate result to store top children for a given path before resolving labels, etc. */
|
||||
private record TopChildrenForPath(int pathValue, int childCount, TopOrdAndIntQueue childQueue) {}
|
||||
@Override
|
||||
public Number get() {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AggregatedValue newAggregatedValue() {
|
||||
return new IntAggregatedValue(0);
|
||||
}
|
||||
}
|
||||
|
@ -152,8 +152,10 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
|
||||
int ordinalCount = ordinalValues.docValueCount();
|
||||
for (int i = 0; i < ordinalCount; i++) {
|
||||
int ord = (int) ordinalValues.nextValue();
|
||||
float newValue = aggregationFunction.aggregate(values[ord], value);
|
||||
values[ord] = newValue;
|
||||
float currentValue = getValue(ord);
|
||||
float newValue = aggregationFunction.aggregate(currentValue, value);
|
||||
setValue(ord, newValue);
|
||||
setCount(ord, getCount(ord) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -188,8 +190,10 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
|
||||
offset += 4;
|
||||
float value = (float) BitUtil.VH_BE_FLOAT.get(bytes, offset);
|
||||
offset += 4;
|
||||
float newValue = aggregationFunction.aggregate(values[ord], value);
|
||||
values[ord] = newValue;
|
||||
float currentValue = getValue(ord);
|
||||
float newValue = aggregationFunction.aggregate(currentValue, value);
|
||||
setValue(ord, newValue);
|
||||
setCount(ord, getCount(ord) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -85,6 +85,7 @@ public class TaxonomyFacetIntAssociations extends IntTaxonomyFacets {
|
||||
int currentValue = getValue(ord);
|
||||
int newValue = aggregationFunction.aggregate(currentValue, value);
|
||||
setValue(ord, newValue);
|
||||
setCount(ord, getCount(ord) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -17,20 +17,44 @@
|
||||
|
||||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import com.carrotsearch.hppc.IntArrayList;
|
||||
import com.carrotsearch.hppc.IntIntHashMap;
|
||||
import com.carrotsearch.hppc.cursors.IntIntCursor;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.FacetsConfig.DimConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.TopOrdAndIntQueue;
|
||||
import org.apache.lucene.facet.TopOrdAndNumberQueue;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Base class for all taxonomy-based facets impls. */
|
||||
abstract class TaxonomyFacets extends Facets {
|
||||
/** Intermediate result to store top children for a given path before resolving labels, etc. */
|
||||
record TopChildrenForPath(Number pathValue, int childCount, TopOrdAndNumberQueue childQueue) {}
|
||||
|
||||
private static class DimValue {
|
||||
String dim;
|
||||
int dimOrd;
|
||||
Number value;
|
||||
|
||||
DimValue(String dim, int dimOrd, Number value) {
|
||||
this.dim = dim;
|
||||
this.dimOrd = dimOrd;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
private static final Comparator<FacetResult> BY_VALUE_THEN_DIM =
|
||||
new Comparator<FacetResult>() {
|
||||
@ -67,6 +91,17 @@ abstract class TaxonomyFacets extends Facets {
|
||||
/** Maps an ordinal to its parent, or -1 if there is no parent (root node). */
|
||||
final ParallelTaxonomyArrays.IntArray parents;
|
||||
|
||||
/** Dense ordinal counts. */
|
||||
int[] counts;
|
||||
|
||||
/** Sparse ordinal counts. */
|
||||
IntIntHashMap sparseCounts;
|
||||
|
||||
/** Have value counters been initialized. */
|
||||
boolean initialized;
|
||||
|
||||
protected Comparator<Number> valueComparator;
|
||||
|
||||
/** Sole constructor. */
|
||||
TaxonomyFacets(
|
||||
String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
|
||||
@ -76,6 +111,78 @@ abstract class TaxonomyFacets extends Facets {
|
||||
this.config = config;
|
||||
this.fc = fc;
|
||||
parents = taxoReader.getParallelTaxonomyArrays().parents();
|
||||
valueComparator = Comparator.comparingInt((x) -> (int) x);
|
||||
}
|
||||
|
||||
/** Return true if a sparse hash table should be used for counting, instead of a dense int[]. */
|
||||
private boolean useHashTable(FacetsCollector fc, TaxonomyReader taxoReader) {
|
||||
if (taxoReader.getSize() < 1024) {
|
||||
// small number of unique values: use an array
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fc == null) {
|
||||
// counting all docs: use an array
|
||||
return false;
|
||||
}
|
||||
|
||||
int maxDoc = 0;
|
||||
int sumTotalHits = 0;
|
||||
for (FacetsCollector.MatchingDocs docs : fc.getMatchingDocs()) {
|
||||
sumTotalHits += docs.totalHits;
|
||||
maxDoc += docs.context.reader().maxDoc();
|
||||
}
|
||||
|
||||
// if our result set is < 10% of the index, we collect sparsely (use hash map):
|
||||
return sumTotalHits < maxDoc / 10;
|
||||
}
|
||||
|
||||
protected void initializeValueCounters() {
|
||||
if (initialized) {
|
||||
return;
|
||||
}
|
||||
initialized = true;
|
||||
assert sparseCounts == null && counts == null;
|
||||
if (useHashTable(fc, taxoReader)) {
|
||||
sparseCounts = new IntIntHashMap();
|
||||
} else {
|
||||
counts = new int[taxoReader.getSize()];
|
||||
}
|
||||
}
|
||||
|
||||
/** Set the count for this ordinal to {@code newValue}. */
|
||||
protected void setCount(int ordinal, int newValue) {
|
||||
if (sparseCounts != null) {
|
||||
sparseCounts.put(ordinal, newValue);
|
||||
} else {
|
||||
counts[ordinal] = newValue;
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the count for this ordinal. */
|
||||
protected int getCount(int ordinal) {
|
||||
if (sparseCounts != null) {
|
||||
return sparseCounts.get(ordinal);
|
||||
} else {
|
||||
return counts[ordinal];
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the aggregation value for this ordinal. */
|
||||
protected Number getAggregationValue(int ordinal) {
|
||||
// By default, this is just the count.
|
||||
return getCount(ordinal);
|
||||
}
|
||||
|
||||
/** Apply an aggregation to the two values and return the result. */
|
||||
protected Number aggregate(Number existingVal, Number newVal) {
|
||||
// By default, we are computing counts, so the values are interpreted as integers and summed.
|
||||
return (int) existingVal + (int) newVal;
|
||||
}
|
||||
|
||||
/** Were any values actually aggregated during counting? */
|
||||
boolean hasValues() {
|
||||
return initialized;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -142,6 +249,320 @@ abstract class TaxonomyFacets extends Facets {
|
||||
return dimConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* Roll-up the aggregation values from {@code childOrdinal} to {@code ordinal}. Overrides should
|
||||
* probably call this to update the counts. Overriding allows us to work with primitive types for
|
||||
* the aggregation values, keeping aggregation efficient.
|
||||
*/
|
||||
protected void updateValueFromRollup(int ordinal, int childOrdinal) throws IOException {
|
||||
setCount(ordinal, getCount(ordinal) + rollup(childOrdinal));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a {@link TopOrdAndNumberQueue} of the appropriate type, i.e. a {@link TopOrdAndIntQueue}
|
||||
* or a {@link org.apache.lucene.facet.TopOrdAndFloatQueue}.
|
||||
*/
|
||||
protected TopOrdAndNumberQueue makeTopOrdAndNumberQueue(int topN) {
|
||||
return new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
|
||||
}
|
||||
|
||||
// TODO: We don't need this if we're okay with having an integer -1 in the results even for float
|
||||
// aggregations.
|
||||
/** Return the value for a missing aggregation, i.e. {@code -1} or {@code -1f}. */
|
||||
protected Number missingAggregationValue() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/** Rolls up any single-valued hierarchical dimensions. */
|
||||
void rollup() throws IOException {
|
||||
if (initialized == false) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Rollup any necessary dims:
|
||||
ParallelTaxonomyArrays.IntArray children = null;
|
||||
for (Map.Entry<String, FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
FacetsConfig.DimConfig ft = ent.getValue();
|
||||
if (ft.hierarchical && ft.multiValued == false) {
|
||||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||
// It can be -1 if this field was declared in the
|
||||
// config but never indexed:
|
||||
if (dimRootOrd > 0) {
|
||||
if (children == null) {
|
||||
// lazy init
|
||||
children = getChildren();
|
||||
}
|
||||
updateValueFromRollup(dimRootOrd, children.get(dimRootOrd));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int rollup(int ord) throws IOException {
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int aggregatedValue = 0;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int currentValue = getCount(ord);
|
||||
int newValue = currentValue + rollup(children.get(ord));
|
||||
setCount(ord, newValue);
|
||||
aggregatedValue += getCount(ord);
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
return aggregatedValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a FacetResult for the provided dim + path and intermediate results. Does the extra work
|
||||
* of resolving ordinals -> labels, etc. Will return null if there are no children.
|
||||
*/
|
||||
private FacetResult createFacetResult(
|
||||
TopChildrenForPath topChildrenForPath, String dim, String... path) throws IOException {
|
||||
// If the intermediate result is null or there are no children, we return null:
|
||||
if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndNumberQueue q = topChildrenForPath.childQueue;
|
||||
assert q != null;
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
int[] ordinals = new int[labelValues.length];
|
||||
Number[] values = new Number[labelValues.length];
|
||||
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndNumberQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
ordinals[i] = ordAndValue.ord;
|
||||
values[i] = ordAndValue.getValue();
|
||||
}
|
||||
|
||||
FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
|
||||
// The path component we're interested in is the one immediately after the provided path. We
|
||||
// add 1 here to also account for the dim:
|
||||
int childComponentIdx = path.length + 1;
|
||||
for (int i = 0; i < labelValues.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[childComponentIdx], values[i]);
|
||||
}
|
||||
|
||||
return new FacetResult(
|
||||
dim, path, topChildrenForPath.pathValue, labelValues, topChildrenForPath.childCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getAllChildren(String dim, String... path) throws IOException {
|
||||
DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = new FacetLabel(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (initialized == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Number aggregatedValue = 0;
|
||||
int aggregatedCount = 0;
|
||||
|
||||
IntArrayList ordinals = new IntArrayList();
|
||||
List<Number> ordValues = new ArrayList<>();
|
||||
|
||||
if (sparseCounts != null) {
|
||||
for (IntIntCursor ordAndCount : sparseCounts) {
|
||||
int ord = ordAndCount.key;
|
||||
int count = ordAndCount.value;
|
||||
Number value = getAggregationValue(ord);
|
||||
if (parents.get(ord) == dimOrd && count > 0) {
|
||||
aggregatedCount += count;
|
||||
aggregatedValue = aggregate(aggregatedValue, value);
|
||||
ordinals.add(ord);
|
||||
ordValues.add(value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int ord = children.get(dimOrd);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int count = counts[ord];
|
||||
Number value = getAggregationValue(ord);
|
||||
if (count > 0) {
|
||||
aggregatedCount += count;
|
||||
aggregatedValue = aggregate(aggregatedValue, value);
|
||||
ordinals.add(ord);
|
||||
ordValues.add(value);
|
||||
}
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
}
|
||||
|
||||
if (aggregatedCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
aggregatedValue = getAggregationValue(dimOrd);
|
||||
} else {
|
||||
// Our aggregated value is not correct, in general:
|
||||
aggregatedValue = missingAggregationValue();
|
||||
}
|
||||
} else {
|
||||
// Our aggregateddim value is accurate, so we keep it
|
||||
}
|
||||
|
||||
// TODO: It would be nice if TaxonomyReader let us pass in a buffer + size so we didn't have to
|
||||
// do an array copy here:
|
||||
FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals.toArray());
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[ordValues.size()];
|
||||
for (int i = 0; i < ordValues.size(); i++) {
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[cp.length], ordValues.get(i));
|
||||
}
|
||||
return new FacetResult(dim, path, aggregatedValue, labelValues, ordinals.size());
|
||||
}
|
||||
|
||||
protected void setIncomingValue(TopOrdAndNumberQueue.OrdAndValue incomingOrdAndValue, int ord) {
|
||||
((TopOrdAndIntQueue.OrdAndInt) incomingOrdAndValue).value = getCount(ord);
|
||||
}
|
||||
|
||||
protected TopOrdAndNumberQueue.OrdAndValue insertIntoQueue(
|
||||
TopOrdAndNumberQueue q, TopOrdAndNumberQueue.OrdAndValue incomingOrdAndValue, int ord) {
|
||||
if (incomingOrdAndValue == null) {
|
||||
incomingOrdAndValue = q.newOrdAndValue();
|
||||
}
|
||||
incomingOrdAndValue.ord = ord;
|
||||
setIncomingValue(incomingOrdAndValue, ord);
|
||||
|
||||
incomingOrdAndValue = q.insertWithOverflow(incomingOrdAndValue);
|
||||
return incomingOrdAndValue;
|
||||
}
|
||||
|
||||
protected abstract static class AggregatedValue {
|
||||
/** Aggregate the value corresponding to the given ordinal into this value. */
|
||||
public abstract void aggregate(int ord);
|
||||
|
||||
/** Retrieve the encapsulated value. */
|
||||
public abstract Number get();
|
||||
}
|
||||
|
||||
private class AggregatedCount extends AggregatedValue {
|
||||
private int count;
|
||||
|
||||
private AggregatedCount(int count) {
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void aggregate(int ord) {
|
||||
count += getCount(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number get() {
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
protected AggregatedValue newAggregatedValue() {
|
||||
return new AggregatedCount(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the top-n children for a specified dimension + path. Results are in an intermediate
|
||||
* form.
|
||||
*/
|
||||
protected TopChildrenForPath getTopChildrenForPath(DimConfig dimConfig, int pathOrd, int topN)
|
||||
throws IOException {
|
||||
TopOrdAndNumberQueue q = makeTopOrdAndNumberQueue(topN);
|
||||
|
||||
AggregatedValue aggregatedValue = newAggregatedValue();
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndNumberQueue.OrdAndValue incomingOrdAndValue = null;
|
||||
|
||||
// TODO: would be faster if we had a "get the following children" API? then we
|
||||
// can make a single pass over the hashmap
|
||||
if (sparseCounts != null) {
|
||||
for (IntIntCursor c : sparseCounts) {
|
||||
int ord = c.key;
|
||||
int count = c.value;
|
||||
if (parents.get(ord) == pathOrd && count > 0) {
|
||||
aggregatedValue.aggregate(ord);
|
||||
childCount++;
|
||||
|
||||
incomingOrdAndValue = insertIntoQueue(q, incomingOrdAndValue, ord);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
int ord = children.get(pathOrd);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int count = counts[ord];
|
||||
if (count > 0) {
|
||||
aggregatedValue.aggregate(ord);
|
||||
childCount++;
|
||||
|
||||
incomingOrdAndValue = insertIntoQueue(q, incomingOrdAndValue, ord);
|
||||
}
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
}
|
||||
|
||||
Number aggregatedValueNumber = aggregatedValue.get();
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
aggregatedValueNumber = getAggregationValue(pathOrd);
|
||||
} else {
|
||||
// Our aggregated value is not correct, in general:
|
||||
aggregatedValueNumber = missingAggregationValue();
|
||||
}
|
||||
}
|
||||
|
||||
return new TopChildrenForPath(aggregatedValueNumber, childCount, q);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
validateTopN(topN);
|
||||
DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = new FacetLabel(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (initialized == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
|
||||
return createFacetResult(topChildrenForPath, dim, path);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
DimConfig dimConfig = verifyDim(dim);
|
||||
if (path.length == 0) {
|
||||
if (dimConfig.hierarchical && dimConfig.multiValued == false) {
|
||||
// ok: rolled up at search time
|
||||
} else if (dimConfig.requireDimCount && dimConfig.multiValued) {
|
||||
// ok: we indexed all ords at index time
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot return dimension-level value alone; use getTopChildren instead");
|
||||
}
|
||||
}
|
||||
int ord = taxoReader.getOrdinal(new FacetLabel(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return initialized ? getAggregationValue(ord) : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
validateTopN(topN);
|
||||
@ -171,6 +592,110 @@ abstract class TaxonomyFacets extends Facets {
|
||||
return results;
|
||||
}
|
||||
|
||||
/** Were any values actually aggregated during counting? */
|
||||
abstract boolean hasValues();
|
||||
@Override
|
||||
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
|
||||
if (topNDims <= 0 || topNChildren <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0");
|
||||
}
|
||||
|
||||
if (initialized == false) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
// get children and siblings ordinal array from TaxonomyFacets
|
||||
ParallelTaxonomyArrays.IntArray children = getChildren();
|
||||
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
|
||||
|
||||
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValue> pq =
|
||||
new PriorityQueue<>(topNDims) {
|
||||
@Override
|
||||
protected boolean lessThan(DimValue a, DimValue b) {
|
||||
int comparison = valueComparator.compare(a.value, b.value);
|
||||
if (comparison < 0) {
|
||||
return true;
|
||||
}
|
||||
if (comparison > 0) {
|
||||
return false;
|
||||
}
|
||||
return a.dim.compareTo(b.dim) > 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Keep track of intermediate results, if we compute them, so we can reuse them later:
|
||||
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||
|
||||
// iterate over children and siblings ordinals for all dims
|
||||
int ord = children.get(TaxonomyReader.ROOT_ORDINAL);
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
String dim = taxoReader.getPath(ord).components[0];
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
if (dimConfig.indexFieldName.equals(indexFieldName)) {
|
||||
FacetLabel cp = new FacetLabel(dim);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd != -1) {
|
||||
Number dimValue;
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
// If the dim is configured as multi-valued and requires dim counts, we can access
|
||||
// an accurate count for the dim computed at indexing time:
|
||||
dimValue = getAggregationValue(dimOrd);
|
||||
} else {
|
||||
// If the dim is configured as multi-valued but not requiring dim counts, we cannot
|
||||
// compute an accurate dim count, and use -1 as a place-holder:
|
||||
dimValue = -1;
|
||||
}
|
||||
} else {
|
||||
// Single-valued dims require aggregating descendant paths to get accurate dim counts
|
||||
// since we don't directly access ancestry paths:
|
||||
// TODO: We could consider indexing dim counts directly if getTopDims is a common
|
||||
// use-case.
|
||||
TopChildrenForPath topChildrenForPath =
|
||||
getTopChildrenForPath(dimConfig, dimOrd, topNChildren);
|
||||
if (intermediateResults == null) {
|
||||
intermediateResults = new HashMap<>();
|
||||
}
|
||||
intermediateResults.put(dim, topChildrenForPath);
|
||||
dimValue = topChildrenForPath.pathValue();
|
||||
}
|
||||
if (valueComparator.compare(dimValue, 0) != 0) {
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValue(dim, dimOrd, dimValue));
|
||||
} else {
|
||||
if (valueComparator.compare(dimValue, pq.top().value) > 0
|
||||
|| (valueComparator.compare(dimValue, pq.top().value) == 0
|
||||
&& dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValue bottomDim = pq.top();
|
||||
bottomDim.dim = dim;
|
||||
bottomDim.value = dimValue;
|
||||
pq.updateTop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ord = siblings.get(ord);
|
||||
}
|
||||
|
||||
FacetResult[] results = new FacetResult[pq.size()];
|
||||
|
||||
while (pq.size() > 0) {
|
||||
DimValue dimValue = pq.pop();
|
||||
assert dimValue != null;
|
||||
String dim = dimValue.dim;
|
||||
TopChildrenForPath topChildrenForPath = null;
|
||||
if (intermediateResults != null) {
|
||||
topChildrenForPath = intermediateResults.get(dim);
|
||||
}
|
||||
if (topChildrenForPath == null) {
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
topChildrenForPath = getTopChildrenForPath(dimConfig, dimValue.dimOrd, topNChildren);
|
||||
}
|
||||
FacetResult facetResult = createFacetResult(topChildrenForPath, dim);
|
||||
assert facetResult != null;
|
||||
results[pq.size()] = facetResult;
|
||||
}
|
||||
return Arrays.asList(results);
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,10 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.facet.DrillDownQuery;
|
||||
import org.apache.lucene.facet.FacetField;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
@ -38,9 +41,12 @@ import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
@ -100,6 +106,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
doc.add(new FloatAssociationFacetField(0.2f, "float", "b"));
|
||||
}
|
||||
}
|
||||
doc.add(new TextField("match", "yes", Field.Store.NO));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
}
|
||||
|
||||
@ -141,6 +148,17 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
doc.add(new TextField("match", "yes", Field.Store.NO));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
}
|
||||
|
||||
// Add more random labels and documents to randomly make the test run on sparse/dense
|
||||
// aggregation values.
|
||||
count = random().nextInt(10_000);
|
||||
for (int i = 0; i < count; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new FacetField("random_dim_" + i, "path"));
|
||||
doc.add(new TextField("match", "no", Field.Store.NO));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
}
|
||||
|
||||
@ -193,7 +211,8 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
public void testIntSumAssociation() throws Exception {
|
||||
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
|
||||
FacetsCollector fc =
|
||||
searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager());
|
||||
|
||||
Facets facets =
|
||||
new TaxonomyFacetIntAssociations(
|
||||
@ -226,7 +245,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
FacetsCollector fc = new FacetsCollector();
|
||||
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
searcher.search(new TermQuery(new Term("match", "yes")), fc);
|
||||
|
||||
Map<String, Integer> expected;
|
||||
Facets facets;
|
||||
@ -274,7 +293,8 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
|
||||
public void testFloatSumAssociation() throws Exception {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
|
||||
FacetsCollector fc =
|
||||
searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager());
|
||||
|
||||
Facets facets =
|
||||
new TaxonomyFacetFloatAssociations(
|
||||
@ -288,7 +308,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
"float",
|
||||
new String[0],
|
||||
2,
|
||||
-1.0f,
|
||||
-1f,
|
||||
new LabelAndValue[] {
|
||||
new LabelAndValue("a", 50.0f), new LabelAndValue("b", 9.999995f),
|
||||
});
|
||||
@ -307,7 +327,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
// test getAllDims and getTopDims
|
||||
List<FacetResult> topDims = facets.getTopDims(10, 10);
|
||||
List<FacetResult> allDims = facets.getAllDims(10);
|
||||
assertEquals(topDims, allDims);
|
||||
assertFloatFacetResultsEqual(topDims, allDims);
|
||||
}
|
||||
|
||||
public void testFloatAssociationRandom() throws Exception {
|
||||
@ -315,7 +335,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
FacetsCollector fc = new FacetsCollector();
|
||||
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
searcher.search(new TermQuery(new Term("match", "yes")), fc);
|
||||
|
||||
Map<String, Float> expected;
|
||||
Facets facets;
|
||||
@ -339,7 +359,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
// test getAllDims and getTopDims
|
||||
List<FacetResult> topDims = facets.getTopDims(10, 10);
|
||||
List<FacetResult> allDims = facets.getAllDims(10);
|
||||
assertEquals(topDims, allDims);
|
||||
assertFloatFacetResultsEqual(topDims, allDims);
|
||||
|
||||
// MAX:
|
||||
facets =
|
||||
@ -360,7 +380,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
// test getAllDims and getTopDims
|
||||
topDims = facets.getTopDims(10, 10);
|
||||
allDims = facets.getAllDims(10);
|
||||
assertEquals(topDims, allDims);
|
||||
assertFloatFacetResultsEqual(topDims, allDims);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -369,7 +389,8 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
*/
|
||||
public void testIntAndFloatAssocation() throws Exception {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
|
||||
FacetsCollector fc =
|
||||
searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager());
|
||||
|
||||
Facets facets =
|
||||
new TaxonomyFacetFloatAssociations(
|
||||
@ -396,7 +417,8 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
|
||||
public void testWrongIndexFieldName() throws Exception {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
FacetsCollector fc = searcher.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
|
||||
FacetsCollector fc =
|
||||
searcher.search(new TermQuery(new Term("match", "yes")), new FacetsCollectorManager());
|
||||
Facets facets =
|
||||
new TaxonomyFacetFloatAssociations(
|
||||
"wrong_field", taxoReader, config, fc, AssociationAggregationFunction.SUM);
|
||||
@ -514,6 +536,63 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
"Wrong count for category 'b'!", 150, facets.getSpecificValue("int", "b").intValue());
|
||||
}
|
||||
|
||||
public void testNonPositiveAggregations() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
|
||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setIndexFieldName("a", "$float_facets");
|
||||
config.setIndexFieldName("b", "$int_facets");
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
Document d;
|
||||
|
||||
d = new Document();
|
||||
// Positive association
|
||||
d.add(new FloatAssociationFacetField(1f, "a", "1"));
|
||||
d.add(new IntAssociationFacetField(1, "b", "1"));
|
||||
writer.addDocument(config.build(taxoWriter, d));
|
||||
|
||||
d = new Document();
|
||||
// Zero association
|
||||
d.add(new FloatAssociationFacetField(0f, "a", "2"));
|
||||
d.add(new IntAssociationFacetField(0, "b", "2"));
|
||||
writer.addDocument(config.build(taxoWriter, d));
|
||||
|
||||
d = new Document();
|
||||
// Negative association
|
||||
d.add(new FloatAssociationFacetField(-1f, "a", "3"));
|
||||
d.add(new IntAssociationFacetField(-1, "b", "3"));
|
||||
writer.addDocument(config.build(taxoWriter, d));
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IOUtils.close(taxoWriter, writer);
|
||||
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
Query q = new MatchAllDocsQuery();
|
||||
FacetsCollector fc = searcher.search(q, new FacetsCollectorManager());
|
||||
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
|
||||
FloatTaxonomyFacets floatFacets =
|
||||
new TaxonomyFacetFloatAssociations(
|
||||
"$float_facets", taxoReader, config, fc, AssociationAggregationFunction.SUM);
|
||||
IntTaxonomyFacets intFacets =
|
||||
new TaxonomyFacetIntAssociations(
|
||||
"$int_facets", taxoReader, config, fc, AssociationAggregationFunction.SUM);
|
||||
|
||||
// "2" and "3" are included in the result despite having non-positive values associated to them.
|
||||
assertEquals(
|
||||
"dim=a path=[] value=0.0 childCount=3\n 1 (1.0)\n 2 (0.0)\n 3 (-1.0)\n",
|
||||
floatFacets.getTopChildren(10, "a").toString());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=0 childCount=3\n 1 (1)\n 2 (0)\n 3 (-1)\n",
|
||||
intFacets.getTopChildren(10, "b").toString());
|
||||
|
||||
IOUtils.close(taxoReader, reader, taxoDir, dir);
|
||||
}
|
||||
|
||||
private void validateInts(
|
||||
String dim,
|
||||
Map<String, Integer> expected,
|
||||
@ -589,6 +668,19 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
private void assertFloatFacetResultsEqual(List<FacetResult> expected, List<FacetResult> actual) {
|
||||
assertEquals(expected.size(), actual.size());
|
||||
for (int i = 0; i < expected.size(); i++) {
|
||||
FacetResult expectedResult = expected.get(i);
|
||||
FacetResult actualResult = actual.get(i);
|
||||
|
||||
assertEquals(expectedResult.dim, actualResult.dim);
|
||||
assertArrayEquals(expectedResult.path, actualResult.path);
|
||||
assertEquals((float) expectedResult.value, (float) actualResult.value, 2e-1);
|
||||
assertEquals(expectedResult.childCount, actualResult.childCount);
|
||||
}
|
||||
}
|
||||
|
||||
// since we have no insight into the ordinals assigned to the values, we sort labels by value and
|
||||
// count in
|
||||
// ascending order in order to compare with expected results
|
||||
|
@ -504,7 +504,9 @@ public class TestTaxonomyFacetValueSource extends FacetTestCase {
|
||||
}
|
||||
|
||||
// LUCENE-10495
|
||||
public void testSiblingsLoaded() throws Exception {
|
||||
public void testChildrenAndSiblingsLoaded() throws Exception {
|
||||
boolean[] shouldLoad = new boolean[] {false, true};
|
||||
for (boolean load : shouldLoad) {
|
||||
Directory indexDir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
|
||||
@ -513,11 +515,11 @@ public class TestTaxonomyFacetValueSource extends FacetTestCase {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
|
||||
config.setHierarchical("a", true);
|
||||
config.setMultiValued("a", true);
|
||||
config.setMultiValued("a", load == false);
|
||||
config.setRequireDimCount("a", true);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new FacetField("a", Integer.toString(2), "1"));
|
||||
doc.add(new FacetField("a", "1", "2"));
|
||||
iw.addDocument(config.build(taxoWriter, doc));
|
||||
|
||||
DirectoryReader r = DirectoryReader.open(iw);
|
||||
@ -526,8 +528,7 @@ public class TestTaxonomyFacetValueSource extends FacetTestCase {
|
||||
FacetsCollector sfc =
|
||||
newSearcher(r).search(new MatchAllDocsQuery(), new FacetsCollectorManager());
|
||||
|
||||
// Test MAX:
|
||||
Facets facets =
|
||||
TaxonomyFacets facets =
|
||||
new TaxonomyFacetFloatAssociations(
|
||||
taxoReader,
|
||||
config,
|
||||
@ -535,23 +536,13 @@ public class TestTaxonomyFacetValueSource extends FacetTestCase {
|
||||
AssociationAggregationFunction.MAX,
|
||||
DoubleValuesSource.fromLongField("price"));
|
||||
|
||||
assertTrue(((TaxonomyFacets) facets).childrenLoaded());
|
||||
assertFalse(((TaxonomyFacets) facets).siblingsLoaded());
|
||||
|
||||
// Test SUM:
|
||||
facets =
|
||||
new TaxonomyFacetFloatAssociations(
|
||||
taxoReader,
|
||||
config,
|
||||
sfc,
|
||||
AssociationAggregationFunction.SUM,
|
||||
DoubleValuesSource.fromLongField("price"));
|
||||
assertTrue(((TaxonomyFacets) facets).childrenLoaded());
|
||||
assertFalse(((TaxonomyFacets) facets).siblingsLoaded());
|
||||
assertEquals(load, facets.childrenLoaded());
|
||||
assertEquals(load, facets.siblingsLoaded());
|
||||
|
||||
iw.close();
|
||||
IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
|
||||
}
|
||||
}
|
||||
|
||||
public void testCountAndSumScore() throws Exception {
|
||||
Directory indexDir = newDirectory();
|
||||
|
Loading…
x
Reference in New Issue
Block a user