Initialize facet counting data structures lazily (#12408)

This change covers:
* Taxonomy faceting
  * FastTaxonomyFacetCounts
  * TaxonomyFacetIntAssociations
  * TaxonomyFacetFloatAssociations
* SSDV faceting
  * SortedSetDocValuesFacetCounts
  * ConcurrentSortedSetDocValuesFacetCounts
  * StringValueFacetCounts
* Range faceting:
  * LongRangeFacetCounts
  * DoubleRangeFacetCounts
* Long faceting:
  * LongValueFacetCounts

Left for a future iteration:
* RangeOnRange faceting
* FacetSet faceting
This commit is contained in:
Greg Miller 2023-07-25 12:20:42 -07:00 committed by GitHub
parent 2b3b028734
commit 179b45bc23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 366 additions and 85 deletions

View File

@ -161,6 +161,9 @@ Optimizations
* GITHUB#12453: Faster bulk numeric reads from BufferedIndexInput (Armin Braun)
* GITHUB#12408: Lazy initialization improvements for Facets implementations when there are segments with no hits
to count. (Greg Miller)
Bug Fixes
---------------------

View File

@ -51,10 +51,13 @@ import org.apache.lucene.util.PriorityQueue;
public class LongValueFacetCounts extends Facets {
/** Used for all values that are < 1K. */
private final int[] counts = new int[1024];
private int[] counts;
/** Used for all values that are >= 1K. */
private final LongIntHashMap hashCounts = new LongIntHashMap();
private LongIntHashMap hashCounts;
/** Whether-or-not counters have been initialized. */
private boolean initialized;
/** Field being counted. */
private final String field;
@ -125,6 +128,7 @@ public class LongValueFacetCounts extends Facets {
public LongValueFacetCounts(String field, LongValuesSource valueSource, IndexReader reader)
throws IOException {
this.field = field;
initializeCounters();
if (valueSource != null) {
countAll(reader, valueSource);
} else {
@ -141,6 +145,7 @@ public class LongValueFacetCounts extends Facets {
public LongValueFacetCounts(String field, MultiLongValuesSource valuesSource, IndexReader reader)
throws IOException {
this.field = field;
initializeCounters();
if (valuesSource != null) {
LongValuesSource singleValued = MultiLongValuesSource.unwrapSingleton(valuesSource);
if (singleValued != null) {
@ -153,11 +158,25 @@ public class LongValueFacetCounts extends Facets {
}
}
private void initializeCounters() {
if (initialized) {
return;
}
assert counts == null && hashCounts == null;
initialized = true;
counts = new int[1024];
hashCounts = new LongIntHashMap();
}
/** Counts from the provided valueSource. */
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeCounters();
LongValues fv = valueSource.getValues(hits.context, null);
@ -183,6 +202,10 @@ public class LongValueFacetCounts extends Facets {
private void count(MultiLongValuesSource valuesSource, List<MatchingDocs> matchingDocs)
throws IOException {
for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeCounters();
MultiLongValues multiValues = valuesSource.getValues(hits.context);
@ -213,6 +236,10 @@ public class LongValueFacetCounts extends Facets {
/** Counts from the field's indexed doc values. */
private void count(String field, List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeCounters();
SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
NumericDocValues singleValues = DocValues.unwrapSingleton(multiValues);
@ -350,6 +377,13 @@ public class LongValueFacetCounts extends Facets {
@Override
public FacetResult getAllChildren(String dim, String... path) throws IOException {
validateDimAndPathForGetChildren(dim, path);
if (initialized == false) {
// nothing was counted (either no hits or no values for all hits):
assert totCount == 0;
return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
}
List<LabelAndValue> labelValues = new ArrayList<>();
for (int i = 0; i < counts.length; i++) {
if (counts[i] != 0) {
@ -378,6 +412,12 @@ public class LongValueFacetCounts extends Facets {
validateTopN(topN);
validateDimAndPathForGetChildren(dim, path);
if (initialized == false) {
// nothing was counted (either no hits or no values for all hits):
assert totCount == 0;
return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
}
PriorityQueue<Entry> pq =
new PriorityQueue<>(Math.min(topN, counts.length + hashCounts.size())) {
@Override
@ -440,6 +480,12 @@ public class LongValueFacetCounts extends Facets {
* efficient to use {@link #getAllChildren(String, String...)}.
*/
public FacetResult getAllChildrenSortByValue() {
if (initialized == false) {
// nothing was counted (either no hits or no values for all hits):
assert totCount == 0;
return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
}
List<LabelAndValue> labelValues = new ArrayList<>();
// compact & sort hash table's arrays by value
@ -533,27 +579,29 @@ public class LongValueFacetCounts extends Facets {
StringBuilder b = new StringBuilder();
b.append("LongValueFacetCounts totCount=");
b.append(totCount);
b.append(":\n");
for (int i = 0; i < counts.length; i++) {
if (counts[i] != 0) {
b.append(" ");
b.append(i);
b.append(" -> count=");
b.append(counts[i]);
b.append('\n');
}
}
if (hashCounts.size() != 0) {
for (LongIntCursor c : hashCounts) {
if (c.value != 0) {
if (initialized) {
b.append(":\n");
for (int i = 0; i < counts.length; i++) {
if (counts[i] != 0) {
b.append(" ");
b.append(c.key);
b.append(i);
b.append(" -> count=");
b.append(c.value);
b.append(counts[i]);
b.append('\n');
}
}
if (hashCounts.size() != 0) {
for (LongIntCursor c : hashCounts) {
if (c.value != 0) {
b.append(" ");
b.append(c.key);
b.append(" -> count=");
b.append(c.value);
b.append('\n');
}
}
}
}
return b.toString();

View File

@ -69,8 +69,9 @@ public class StringValueFacetCounts extends Facets {
private final OrdinalMap ordinalMap;
private final SortedSetDocValues docValues;
private final int[] denseCounts;
private int[] denseCounts;
private final IntIntHashMap sparseCounts;
private boolean initialized;
private final int cardinality;
private int totalDocCount;
@ -101,7 +102,9 @@ public class StringValueFacetCounts extends Facets {
if (facetsCollector != null) {
if (cardinality < 1024) { // count densely for low cardinality
sparseCounts = null;
denseCounts = new int[cardinality];
denseCounts = null;
initialized = false;
count(facetsCollector);
} else {
int totalHits = 0;
int totalDocs = 0;
@ -110,22 +113,31 @@ public class StringValueFacetCounts extends Facets {
totalDocs += matchingDocs.context.reader().maxDoc();
}
// If our result set is < 10% of the index, we collect sparsely (use hash map). This
// heuristic is borrowed from IntTaxonomyFacetCounts:
if (totalHits < totalDocs / 10) {
sparseCounts = new IntIntHashMap();
denseCounts = null;
} else {
// No counting needed if there are no hits:
if (totalHits == 0) {
sparseCounts = null;
denseCounts = new int[cardinality];
denseCounts = null;
initialized = true;
} else {
// If our result set is < 10% of the index, we collect sparsely (use hash map). This
// heuristic is borrowed from IntTaxonomyFacetCounts:
if (totalHits < totalDocs / 10) {
sparseCounts = new IntIntHashMap();
denseCounts = null;
initialized = true;
} else {
sparseCounts = null;
denseCounts = new int[cardinality];
initialized = true;
}
count(facetsCollector);
}
}
count(facetsCollector);
} else {
// Since we're counting all ordinals, count densely:
sparseCounts = null;
denseCounts = new int[cardinality];
initialized = true;
countAll();
}
@ -294,6 +306,9 @@ public class StringValueFacetCounts extends Facets {
if (matchingDocs.size() == 1) {
FacetsCollector.MatchingDocs hits = matchingDocs.get(0);
if (hits.totalHits == 0) {
return;
}
// Validate state before doing anything else:
validateState(hits.context);
@ -314,6 +329,10 @@ public class StringValueFacetCounts extends Facets {
assert ordinalMap != null;
assert docValues instanceof MultiDocValues.MultiSortedSetDocValues;
if (hits.totalHits == 0) {
continue;
}
MultiDocValues.MultiSortedSetDocValues multiValues =
(MultiDocValues.MultiSortedSetDocValues) docValues;
@ -368,6 +387,13 @@ public class StringValueFacetCounts extends Facets {
FacetsCollector.MatchingDocs hits,
Bits liveDocs)
throws IOException {
if (initialized == false) {
assert denseCounts == null && sparseCounts == null;
// If the counters weren't initialized, we can assume the cardinality is low enough that
// dense counting will be preferrable:
denseCounts = new int[cardinality];
initialized = true;
}
// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)

View File

@ -157,20 +157,25 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
private void count(DoubleValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
LongRange[] longRanges = getLongRanges();
LongRangeCounter counter = LongRangeCounter.create(longRanges, counts);
LongRangeCounter counter = null;
int missingCount = 0;
for (MatchingDocs hits : matchingDocs) {
DoubleValues fv = valueSource.getValues(hits.context, null);
totCount += hits.totalHits;
if (hits.totalHits == 0) {
continue;
}
final DocIdSetIterator it = createIterator(hits);
if (it == null) {
continue;
}
if (counter == null) {
counter = setupCounter();
}
DoubleValues fv = valueSource.getValues(hits.context, null);
totCount += hits.totalHits;
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs:
if (fv.advanceExact(doc)) {
@ -183,27 +188,34 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
}
}
missingCount += counter.finish();
totCount -= missingCount;
if (counter != null) {
missingCount += counter.finish();
totCount -= missingCount;
}
}
/** Counts from the provided valueSource. */
private void count(MultiDoubleValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
LongRange[] longRanges = getLongRanges();
LongRangeCounter counter = LongRangeCounter.create(longRanges, counts);
LongRangeCounter counter = null; // LongRangeCounter.create(longRanges, counts);
int missingCount = 0;
for (MatchingDocs hits : matchingDocs) {
MultiDoubleValues multiValues = valueSource.getValues(hits.context);
if (hits.totalHits == 0) {
continue;
}
final DocIdSetIterator it = createIterator(hits);
if (it == null) {
continue;
}
if (counter == null) {
counter = setupCounter();
}
MultiDoubleValues multiValues = valueSource.getValues(hits.context);
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs:
if (multiValues.advanceExact(doc)) {
@ -232,8 +244,10 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
}
}
missingCount += counter.finish();
totCount -= missingCount;
if (counter != null) {
missingCount += counter.finish();
totCount -= missingCount;
}
}
/** Create long ranges from the double ranges. */

View File

@ -128,21 +128,27 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
LongRange[] ranges = getLongRanges();
LongRangeCounter counter = LongRangeCounter.create(ranges, counts);
LongRangeCounter counter = null;
int missingCount = 0;
for (MatchingDocs hits : matchingDocs) {
LongValues fv = valueSource.getValues(hits.context, null);
totCount += hits.totalHits;
if (hits.totalHits == 0) {
continue;
}
final DocIdSetIterator it = createIterator(hits);
if (it == null) {
continue;
}
if (counter == null) {
counter = setupCounter();
}
LongValues fv = valueSource.getValues(hits.context, null);
totCount += hits.totalHits;
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs:
if (fv.advanceExact(doc)) {
@ -155,26 +161,34 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
}
}
missingCount += counter.finish();
totCount -= missingCount;
if (counter != null) {
missingCount += counter.finish();
totCount -= missingCount;
}
}
/** Counts from the provided valueSource. */
private void count(MultiLongValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException {
LongRange[] ranges = getLongRanges();
LongRangeCounter counter = LongRangeCounter.create(ranges, counts);
LongRangeCounter counter = null;
for (MatchingDocs hits : matchingDocs) {
MultiLongValues multiValues = valueSource.getValues(hits.context);
if (hits.totalHits == 0) {
continue;
}
final DocIdSetIterator it = createIterator(hits);
if (it == null) {
continue;
}
if (counter == null) {
counter = setupCounter();
}
MultiLongValues multiValues = valueSource.getValues(hits.context);
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs:
if (multiValues.advanceExact(doc)) {
@ -203,8 +217,10 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
}
}
int missingCount = counter.finish();
totCount -= missingCount;
if (counter != null) {
int missingCount = counter.finish();
totCount -= missingCount;
}
}
@Override

View File

@ -39,8 +39,8 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
/** Ranges passed to constructor. */
protected final Range[] ranges;
/** Counts, initialized in by subclass. */
protected final int[] counts;
/** Counts. */
protected int[] counts;
/** Our field name. */
protected final String field;
@ -53,7 +53,6 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
super(fastMatchQuery);
this.field = field;
this.ranges = ranges;
counts = new int[ranges.length];
}
protected abstract LongRange[] getLongRanges();
@ -62,6 +61,12 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
return l;
}
protected LongRangeCounter setupCounter() {
assert counts == null;
counts = new int[ranges.length];
return LongRangeCounter.create(getLongRanges(), counts);
}
/** Counts from the provided field. */
protected void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs)
throws IOException {
@ -69,15 +74,20 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
// load doc values for all segments up front and keep track of whether-or-not we found any that
// were actually multi-valued. this allows us to optimize the case where all segments contain
// single-values.
SortedNumericDocValues[] multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
SortedNumericDocValues[] multiValuedDocVals = null;
NumericDocValues[] singleValuedDocVals = null;
boolean foundMultiValued = false;
for (int i = 0; i < matchingDocs.size(); i++) {
FacetsCollector.MatchingDocs hits = matchingDocs.get(i);
if (hits.totalHits == 0) {
continue;
}
SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
if (multiValuedDocVals == null) {
multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
}
multiValuedDocVals[i] = multiValues;
// only bother trying to unwrap a singleton if we haven't yet seen any true multi-valued cases
@ -94,6 +104,11 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
}
}
if (multiValuedDocVals == null) {
// no hits or no doc values in all segments. nothing to count:
return;
}
// we only need to keep around one or the other at this point
if (foundMultiValued) {
singleValuedDocVals = null;
@ -101,7 +116,7 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
multiValuedDocVals = null;
}
LongRangeCounter counter = LongRangeCounter.create(getLongRanges(), counts);
LongRangeCounter counter = setupCounter();
int missingCount = 0;
@ -183,9 +198,15 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
@Override
public FacetResult getAllChildren(String dim, String... path) throws IOException {
validateDimAndPathForGetChildren(dim, path);
LabelAndValue[] labelValues = new LabelAndValue[counts.length];
for (int i = 0; i < counts.length; i++) {
labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
LabelAndValue[] labelValues = new LabelAndValue[ranges.length];
if (counts == null) {
for (int i = 0; i < ranges.length; i++) {
labelValues[i] = new LabelAndValue(ranges[i].label, 0);
}
} else {
for (int i = 0; i < ranges.length; i++) {
labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
}
}
return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
}
@ -195,6 +216,11 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
validateTopN(topN);
validateDimAndPathForGetChildren(dim, path);
if (counts == null) {
assert totCount == 0;
return new FacetResult(dim, path, totCount, new LabelAndValue[0], 0);
}
PriorityQueue<Entry> pq =
new PriorityQueue<>(Math.min(topN, counts.length)) {
@Override
@ -251,7 +277,7 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
b.append(" ");
b.append(ranges[i].label);
b.append(" -> count=");
b.append(counts[i]);
b.append(counts != null ? counts[i] : 0);
b.append('\n');
}
return b.toString();

View File

@ -19,6 +19,7 @@ package org.apache.lucene.facet.sortedset;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
@ -68,6 +69,9 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
validateTopN(topN);
if (hasCounts() == false) {
return null;
}
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim, path);
return createFacetResult(topChildrenForPath, dim, path);
}
@ -80,6 +84,10 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
return null;
}
if (hasCounts() == false) {
return null;
}
// Compute the actual results:
int pathCount = 0;
List<LabelAndValue> labelValues = new ArrayList<>();
@ -111,12 +119,17 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
return -1;
}
return getCount(ord);
return hasCounts() == false ? 0 : getCount(ord);
}
@Override
public List<FacetResult> getAllDims(int topN) throws IOException {
validateTopN(topN);
if (hasCounts() == false) {
return Collections.emptyList();
}
List<FacetResult> results = new ArrayList<>();
for (String dim : state.getDims()) {
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim);
@ -136,6 +149,10 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
validateTopN(topNDims);
validateTopN(topNChildren);
if (hasCounts() == false) {
return Collections.emptyList();
}
// Creates priority queue to store top dimensions and sort by their aggregated values/hits and
// string values.
PriorityQueue<DimValue> pq =
@ -230,6 +247,9 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
return Arrays.asList(results);
}
/** Were any counts actually computed? (They may not be if there are no hits, etc.) */
abstract boolean hasCounts();
/** Retrieve the count for a specified ordinal. */
abstract int getCount(int ord);

View File

@ -77,6 +77,13 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends AbstractSortedSetDo
}
}
@Override
boolean hasCounts() {
// TODO: safe to always assume there are counts, but maybe it would be more optimal to
// actually track if we see a count?
return true;
}
@Override
int getCount(int ord) {
return counts.get(ord);
@ -99,6 +106,11 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends AbstractSortedSetDo
@Override
public Void call() throws IOException {
// If we're counting collected hits but there were none, short-circuit:
if (hits != null && hits.totalHits == 0) {
return null;
}
SortedSetDocValues multiValues = DocValues.getSortedSet(leafReader, field);
if (multiValues == null) {
// nothing to count here

View File

@ -56,7 +56,8 @@ import org.apache.lucene.util.LongValues;
* @lucene.experimental
*/
public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFacetCounts {
final int[] counts;
private final SortedSetDocValuesReaderState state;
int[] counts;
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) throws IOException {
@ -67,7 +68,7 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state, FacetsCollector hits)
throws IOException {
super(state);
this.counts = new int[state.getSize()];
this.state = state;
if (hits == null) {
// browse only
countAll();
@ -76,6 +77,17 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
}
}
private void initializeCounts() {
if (counts == null) {
counts = new int[state.getSize()];
}
}
@Override
boolean hasCounts() {
return counts != null;
}
@Override
int getCount(int ord) {
return counts[ord];
@ -90,6 +102,9 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
return;
}
// Initialize counts:
initializeCounts();
// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
@ -159,12 +174,19 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
private void countOneSegment(
OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits, Bits liveDocs)
throws IOException {
if (hits != null && hits.totalHits == 0) {
return;
}
SortedSetDocValues multiValues = DocValues.getSortedSet(reader, field);
if (multiValues == null) {
// nothing to count
return;
}
// Initialize counts:
initializeCounts();
// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);

View File

@ -71,11 +71,15 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
private void count(List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
SortedNumericDocValues multiValued =
hits.context.reader().getSortedNumericDocValues(indexFieldName);
if (multiValued == null) {
continue;
}
initializeValueCounters();
NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued);
@ -114,13 +118,14 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
}
private void countAll(IndexReader reader) throws IOException {
assert values != null;
for (LeafReaderContext context : reader.leaves()) {
SortedNumericDocValues multiValued =
context.reader().getSortedNumericDocValues(indexFieldName);
if (multiValued == null) {
continue;
}
initializeValueCounters();
assert values != null;
Bits liveDocs = context.reader().getLiveDocs();

View File

@ -20,10 +20,12 @@ import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.FacetsConfig.DimConfig;
import org.apache.lucene.facet.LabelAndValue;
@ -39,22 +41,37 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
final AssociationAggregationFunction aggregationFunction;
/** Per-ordinal value. */
final float[] values;
float[] values;
/** Sole constructor. */
FloatTaxonomyFacets(
String indexFieldName,
TaxonomyReader taxoReader,
AssociationAggregationFunction aggregationFunction,
FacetsConfig config)
FacetsConfig config,
FacetsCollector fc)
throws IOException {
super(indexFieldName, taxoReader, config);
super(indexFieldName, taxoReader, config, fc);
this.aggregationFunction = aggregationFunction;
values = new float[taxoReader.getSize()];
}
@Override
boolean hasValues() {
return values != null;
}
void initializeValueCounters() {
if (values == null) {
values = new float[taxoReader.getSize()];
}
}
/** Rolls up any single-valued hierarchical dimensions. */
void rollup() throws IOException {
if (values == null) {
return;
}
// Rollup any necessary dims:
int[] children = getChildren();
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
@ -100,7 +117,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
if (ord < 0) {
return -1;
}
return values[ord];
return values == null ? 0 : values[ord];
}
@Override
@ -112,6 +129,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
return null;
}
if (values == null) {
return null;
}
int[] children = getChildren();
int[] siblings = getSiblings();
@ -166,6 +187,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
return null;
}
if (values == null) {
return null;
}
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
return createFacetResult(topChildrenForPath, dim, path);
}
@ -264,6 +289,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
validateTopN(topNDims);
validateTopN(topNChildren);
if (values == null) {
return Collections.emptyList();
}
// get existing children and siblings ordinal array from TaxonomyFacets
int[] children = getChildren();
int[] siblings = getSiblings();

View File

@ -21,6 +21,7 @@ import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -40,10 +41,13 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
final AssociationAggregationFunction aggregationFunction;
/** Dense ordinal values. */
final int[] values;
int[] values;
/** Sparse ordinal values. */
final IntIntHashMap sparseValues;
IntIntHashMap sparseValues;
/** Have value counters been initialized. */
boolean initialized;
/** Sole constructor. */
IntTaxonomyFacets(
@ -53,14 +57,24 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
AssociationAggregationFunction aggregationFunction,
FacetsCollector fc)
throws IOException {
super(indexFieldName, taxoReader, config);
super(indexFieldName, taxoReader, config, fc);
this.aggregationFunction = aggregationFunction;
}
@Override
boolean hasValues() {
return initialized;
}
void initializeValueCounters() {
if (initialized) {
return;
}
initialized = true;
assert sparseValues == null && values == null;
if (useHashTable(fc, taxoReader)) {
sparseValues = new IntIntHashMap();
values = null;
} else {
sparseValues = null;
values = new int[taxoReader.getSize()];
}
}
@ -85,6 +99,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
/** Rolls up any single-valued hierarchical dimensions. */
void rollup() throws IOException {
if (initialized == false) {
return;
}
// Rollup any necessary dims:
int[] children = null;
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
@ -161,7 +179,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
if (ord < 0) {
return -1;
}
return getValue(ord);
return initialized ? getValue(ord) : 0;
}
@Override
@ -173,6 +191,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
return null;
}
if (initialized == false) {
return null;
}
int aggregatedValue = 0;
IntArrayList ordinals = new IntArrayList();
@ -239,6 +261,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
return null;
}
if (initialized == false) {
return null;
}
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
return createFacetResult(topChildrenForPath, dim, path);
}
@ -324,6 +350,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
throw new IllegalArgumentException("topN must be > 0");
}
if (initialized == false) {
return Collections.emptyList();
}
// get children and siblings ordinal array from TaxonomyFacets
int[] children = getChildren();
int[] siblings = getSiblings();

View File

@ -88,7 +88,7 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
FacetsCollector fc,
AssociationAggregationFunction aggregationFunction)
throws IOException {
super(indexFieldName, taxoReader, aggregationFunction, config);
super(indexFieldName, taxoReader, aggregationFunction, config, fc);
aggregateValues(aggregationFunction, fc.getMatchingDocs());
}
@ -104,7 +104,7 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
AssociationAggregationFunction aggregationFunction,
DoubleValuesSource valuesSource)
throws IOException {
super(indexFieldName, taxoReader, aggregationFunction, config);
super(indexFieldName, taxoReader, aggregationFunction, config, fc);
aggregateValues(aggregationFunction, fc.getMatchingDocs(), fc.getKeepScores(), valuesSource);
}
@ -134,6 +134,11 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
DoubleValuesSource valueSource)
throws IOException {
for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeValueCounters();
SortedNumericDocValues ordinalValues =
DocValues.getSortedNumeric(hits.context.reader(), indexFieldName);
DoubleValues scores = keepScores ? scores(hits) : null;
@ -164,6 +169,11 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
throws IOException {
for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeValueCounters();
BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName);
DocIdSetIterator it =
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));

View File

@ -63,6 +63,11 @@ public class TaxonomyFacetIntAssociations extends IntTaxonomyFacets {
AssociationAggregationFunction aggregationFunction, List<MatchingDocs> matchingDocs)
throws IOException {
for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeValueCounters();
BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName);
DocIdSetIterator it = ConjunctionUtils.intersectIterators(List.of(hits.bits.iterator(), dv));

View File

@ -19,11 +19,13 @@ package org.apache.lucene.facet.taxonomy;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.FacetsConfig.DimConfig;
@ -53,6 +55,9 @@ abstract class TaxonomyFacets extends Facets {
/** {@code FacetsConfig} provided to the constructor. */
final FacetsConfig config;
/** {@code FacetsCollector} provided to the constructor. */
final FacetsCollector fc;
/** Maps parent ordinal to its child, or -1 if the parent is childless. */
private int[] children;
@ -63,11 +68,13 @@ abstract class TaxonomyFacets extends Facets {
final int[] parents;
/** Sole constructor. */
TaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config)
TaxonomyFacets(
String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
throws IOException {
this.indexFieldName = indexFieldName;
this.taxoReader = taxoReader;
this.config = config;
this.fc = fc;
parents = taxoReader.getParallelTaxonomyArrays().parents();
}
@ -138,6 +145,11 @@ abstract class TaxonomyFacets extends Facets {
@Override
public List<FacetResult> getAllDims(int topN) throws IOException {
validateTopN(topN);
if (hasValues() == false) {
return Collections.emptyList();
}
int[] children = getChildren();
int[] siblings = getSiblings();
int ord = children[TaxonomyReader.ROOT_ORDINAL];
@ -158,4 +170,7 @@ abstract class TaxonomyFacets extends Facets {
results.sort(BY_VALUE_THEN_DIM);
return results;
}
/** Were any values actually aggregated during counting? */
abstract boolean hasValues();
}