Initialize facet counting data structures lazily (#12408)

This change covers:
* Taxonomy faceting
  * FastTaxonomyFacetCounts
  * TaxonomyFacetIntAssociations
  * TaxonomyFacetFloatAssociations
* SSDV faceting
  * SortedSetDocValuesFacetCounts
  * ConcurrentSortedSetDocValuesFacetCounts
  * StringValueFacetCounts
* Range faceting:
  * LongRangeFacetCounts
  * DoubleRangeFacetCounts
* Long faceting:
  * LongValueFacetCounts

Left for a future iteration:
* RangeOnRange faceting
* FacetSet faceting
This commit is contained in:
Greg Miller 2023-07-25 12:20:42 -07:00 committed by GitHub
parent 2b3b028734
commit 179b45bc23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 366 additions and 85 deletions

View File

@ -161,6 +161,9 @@ Optimizations
* GITHUB#12453: Faster bulk numeric reads from BufferedIndexInput (Armin Braun) * GITHUB#12453: Faster bulk numeric reads from BufferedIndexInput (Armin Braun)
* GITHUB#12408: Lazy initialization improvements for Facets implementations when there are segments with no hits
to count. (Greg Miller)
Bug Fixes Bug Fixes
--------------------- ---------------------

View File

@ -51,10 +51,13 @@ import org.apache.lucene.util.PriorityQueue;
public class LongValueFacetCounts extends Facets { public class LongValueFacetCounts extends Facets {
/** Used for all values that are < 1K. */ /** Used for all values that are < 1K. */
private final int[] counts = new int[1024]; private int[] counts;
/** Used for all values that are >= 1K. */ /** Used for all values that are >= 1K. */
private final LongIntHashMap hashCounts = new LongIntHashMap(); private LongIntHashMap hashCounts;
/** Whether-or-not counters have been initialized. */
private boolean initialized;
/** Field being counted. */ /** Field being counted. */
private final String field; private final String field;
@ -125,6 +128,7 @@ public class LongValueFacetCounts extends Facets {
public LongValueFacetCounts(String field, LongValuesSource valueSource, IndexReader reader) public LongValueFacetCounts(String field, LongValuesSource valueSource, IndexReader reader)
throws IOException { throws IOException {
this.field = field; this.field = field;
initializeCounters();
if (valueSource != null) { if (valueSource != null) {
countAll(reader, valueSource); countAll(reader, valueSource);
} else { } else {
@ -141,6 +145,7 @@ public class LongValueFacetCounts extends Facets {
public LongValueFacetCounts(String field, MultiLongValuesSource valuesSource, IndexReader reader) public LongValueFacetCounts(String field, MultiLongValuesSource valuesSource, IndexReader reader)
throws IOException { throws IOException {
this.field = field; this.field = field;
initializeCounters();
if (valuesSource != null) { if (valuesSource != null) {
LongValuesSource singleValued = MultiLongValuesSource.unwrapSingleton(valuesSource); LongValuesSource singleValued = MultiLongValuesSource.unwrapSingleton(valuesSource);
if (singleValued != null) { if (singleValued != null) {
@ -153,11 +158,25 @@ public class LongValueFacetCounts extends Facets {
} }
} }
private void initializeCounters() {
if (initialized) {
return;
}
assert counts == null && hashCounts == null;
initialized = true;
counts = new int[1024];
hashCounts = new LongIntHashMap();
}
/** Counts from the provided valueSource. */ /** Counts from the provided valueSource. */
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs) private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException { throws IOException {
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeCounters();
LongValues fv = valueSource.getValues(hits.context, null); LongValues fv = valueSource.getValues(hits.context, null);
@ -183,6 +202,10 @@ public class LongValueFacetCounts extends Facets {
private void count(MultiLongValuesSource valuesSource, List<MatchingDocs> matchingDocs) private void count(MultiLongValuesSource valuesSource, List<MatchingDocs> matchingDocs)
throws IOException { throws IOException {
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeCounters();
MultiLongValues multiValues = valuesSource.getValues(hits.context); MultiLongValues multiValues = valuesSource.getValues(hits.context);
@ -213,6 +236,10 @@ public class LongValueFacetCounts extends Facets {
/** Counts from the field's indexed doc values. */ /** Counts from the field's indexed doc values. */
private void count(String field, List<MatchingDocs> matchingDocs) throws IOException { private void count(String field, List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeCounters();
SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field); SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
NumericDocValues singleValues = DocValues.unwrapSingleton(multiValues); NumericDocValues singleValues = DocValues.unwrapSingleton(multiValues);
@ -350,6 +377,13 @@ public class LongValueFacetCounts extends Facets {
@Override @Override
public FacetResult getAllChildren(String dim, String... path) throws IOException { public FacetResult getAllChildren(String dim, String... path) throws IOException {
validateDimAndPathForGetChildren(dim, path); validateDimAndPathForGetChildren(dim, path);
if (initialized == false) {
// nothing was counted (either no hits or no values for all hits):
assert totCount == 0;
return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
}
List<LabelAndValue> labelValues = new ArrayList<>(); List<LabelAndValue> labelValues = new ArrayList<>();
for (int i = 0; i < counts.length; i++) { for (int i = 0; i < counts.length; i++) {
if (counts[i] != 0) { if (counts[i] != 0) {
@ -378,6 +412,12 @@ public class LongValueFacetCounts extends Facets {
validateTopN(topN); validateTopN(topN);
validateDimAndPathForGetChildren(dim, path); validateDimAndPathForGetChildren(dim, path);
if (initialized == false) {
// nothing was counted (either no hits or no values for all hits):
assert totCount == 0;
return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
}
PriorityQueue<Entry> pq = PriorityQueue<Entry> pq =
new PriorityQueue<>(Math.min(topN, counts.length + hashCounts.size())) { new PriorityQueue<>(Math.min(topN, counts.length + hashCounts.size())) {
@Override @Override
@ -440,6 +480,12 @@ public class LongValueFacetCounts extends Facets {
* efficient to use {@link #getAllChildren(String, String...)}. * efficient to use {@link #getAllChildren(String, String...)}.
*/ */
public FacetResult getAllChildrenSortByValue() { public FacetResult getAllChildrenSortByValue() {
if (initialized == false) {
// nothing was counted (either no hits or no values for all hits):
assert totCount == 0;
return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
}
List<LabelAndValue> labelValues = new ArrayList<>(); List<LabelAndValue> labelValues = new ArrayList<>();
// compact & sort hash table's arrays by value // compact & sort hash table's arrays by value
@ -533,6 +579,7 @@ public class LongValueFacetCounts extends Facets {
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();
b.append("LongValueFacetCounts totCount="); b.append("LongValueFacetCounts totCount=");
b.append(totCount); b.append(totCount);
if (initialized) {
b.append(":\n"); b.append(":\n");
for (int i = 0; i < counts.length; i++) { for (int i = 0; i < counts.length; i++) {
if (counts[i] != 0) { if (counts[i] != 0) {
@ -555,6 +602,7 @@ public class LongValueFacetCounts extends Facets {
} }
} }
} }
}
return b.toString(); return b.toString();
} }

View File

@ -69,8 +69,9 @@ public class StringValueFacetCounts extends Facets {
private final OrdinalMap ordinalMap; private final OrdinalMap ordinalMap;
private final SortedSetDocValues docValues; private final SortedSetDocValues docValues;
private final int[] denseCounts; private int[] denseCounts;
private final IntIntHashMap sparseCounts; private final IntIntHashMap sparseCounts;
private boolean initialized;
private final int cardinality; private final int cardinality;
private int totalDocCount; private int totalDocCount;
@ -101,7 +102,9 @@ public class StringValueFacetCounts extends Facets {
if (facetsCollector != null) { if (facetsCollector != null) {
if (cardinality < 1024) { // count densely for low cardinality if (cardinality < 1024) { // count densely for low cardinality
sparseCounts = null; sparseCounts = null;
denseCounts = new int[cardinality]; denseCounts = null;
initialized = false;
count(facetsCollector);
} else { } else {
int totalHits = 0; int totalHits = 0;
int totalDocs = 0; int totalDocs = 0;
@ -110,22 +113,31 @@ public class StringValueFacetCounts extends Facets {
totalDocs += matchingDocs.context.reader().maxDoc(); totalDocs += matchingDocs.context.reader().maxDoc();
} }
// No counting needed if there are no hits:
if (totalHits == 0) {
sparseCounts = null;
denseCounts = null;
initialized = true;
} else {
// If our result set is < 10% of the index, we collect sparsely (use hash map). This // If our result set is < 10% of the index, we collect sparsely (use hash map). This
// heuristic is borrowed from IntTaxonomyFacetCounts: // heuristic is borrowed from IntTaxonomyFacetCounts:
if (totalHits < totalDocs / 10) { if (totalHits < totalDocs / 10) {
sparseCounts = new IntIntHashMap(); sparseCounts = new IntIntHashMap();
denseCounts = null; denseCounts = null;
initialized = true;
} else { } else {
sparseCounts = null; sparseCounts = null;
denseCounts = new int[cardinality]; denseCounts = new int[cardinality];
initialized = true;
} }
}
count(facetsCollector); count(facetsCollector);
}
}
} else { } else {
// Since we're counting all ordinals, count densely: // Since we're counting all ordinals, count densely:
sparseCounts = null; sparseCounts = null;
denseCounts = new int[cardinality]; denseCounts = new int[cardinality];
initialized = true;
countAll(); countAll();
} }
@ -294,6 +306,9 @@ public class StringValueFacetCounts extends Facets {
if (matchingDocs.size() == 1) { if (matchingDocs.size() == 1) {
FacetsCollector.MatchingDocs hits = matchingDocs.get(0); FacetsCollector.MatchingDocs hits = matchingDocs.get(0);
if (hits.totalHits == 0) {
return;
}
// Validate state before doing anything else: // Validate state before doing anything else:
validateState(hits.context); validateState(hits.context);
@ -314,6 +329,10 @@ public class StringValueFacetCounts extends Facets {
assert ordinalMap != null; assert ordinalMap != null;
assert docValues instanceof MultiDocValues.MultiSortedSetDocValues; assert docValues instanceof MultiDocValues.MultiSortedSetDocValues;
if (hits.totalHits == 0) {
continue;
}
MultiDocValues.MultiSortedSetDocValues multiValues = MultiDocValues.MultiSortedSetDocValues multiValues =
(MultiDocValues.MultiSortedSetDocValues) docValues; (MultiDocValues.MultiSortedSetDocValues) docValues;
@ -368,6 +387,13 @@ public class StringValueFacetCounts extends Facets {
FacetsCollector.MatchingDocs hits, FacetsCollector.MatchingDocs hits,
Bits liveDocs) Bits liveDocs)
throws IOException { throws IOException {
if (initialized == false) {
assert denseCounts == null && sparseCounts == null;
// If the counters weren't initialized, we can assume the cardinality is low enough that
// dense counting will be preferrable:
denseCounts = new int[cardinality];
initialized = true;
}
// It's slightly more efficient to work against SortedDocValues if the field is actually // It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309) // single-valued (see: LUCENE-5309)

View File

@ -157,20 +157,25 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
private void count(DoubleValuesSource valueSource, List<MatchingDocs> matchingDocs) private void count(DoubleValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException { throws IOException {
LongRange[] longRanges = getLongRanges(); LongRangeCounter counter = null;
LongRangeCounter counter = LongRangeCounter.create(longRanges, counts);
int missingCount = 0; int missingCount = 0;
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
DoubleValues fv = valueSource.getValues(hits.context, null); if (hits.totalHits == 0) {
totCount += hits.totalHits; continue;
}
final DocIdSetIterator it = createIterator(hits); final DocIdSetIterator it = createIterator(hits);
if (it == null) { if (it == null) {
continue; continue;
} }
if (counter == null) {
counter = setupCounter();
}
DoubleValues fv = valueSource.getValues(hits.context, null);
totCount += hits.totalHits;
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs: // Skip missing docs:
if (fv.advanceExact(doc)) { if (fv.advanceExact(doc)) {
@ -183,27 +188,34 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
} }
} }
if (counter != null) {
missingCount += counter.finish(); missingCount += counter.finish();
totCount -= missingCount; totCount -= missingCount;
} }
}
/** Counts from the provided valueSource. */ /** Counts from the provided valueSource. */
private void count(MultiDoubleValuesSource valueSource, List<MatchingDocs> matchingDocs) private void count(MultiDoubleValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException { throws IOException {
LongRange[] longRanges = getLongRanges(); LongRangeCounter counter = null; // LongRangeCounter.create(longRanges, counts);
LongRangeCounter counter = LongRangeCounter.create(longRanges, counts);
int missingCount = 0; int missingCount = 0;
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
MultiDoubleValues multiValues = valueSource.getValues(hits.context); if (hits.totalHits == 0) {
continue;
}
final DocIdSetIterator it = createIterator(hits); final DocIdSetIterator it = createIterator(hits);
if (it == null) { if (it == null) {
continue; continue;
} }
if (counter == null) {
counter = setupCounter();
}
MultiDoubleValues multiValues = valueSource.getValues(hits.context);
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs: // Skip missing docs:
if (multiValues.advanceExact(doc)) { if (multiValues.advanceExact(doc)) {
@ -232,9 +244,11 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
} }
} }
if (counter != null) {
missingCount += counter.finish(); missingCount += counter.finish();
totCount -= missingCount; totCount -= missingCount;
} }
}
/** Create long ranges from the double ranges. */ /** Create long ranges from the double ranges. */
@Override @Override

View File

@ -128,21 +128,27 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs) private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException { throws IOException {
LongRange[] ranges = getLongRanges(); LongRangeCounter counter = null;
LongRangeCounter counter = LongRangeCounter.create(ranges, counts);
int missingCount = 0; int missingCount = 0;
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
LongValues fv = valueSource.getValues(hits.context, null); if (hits.totalHits == 0) {
totCount += hits.totalHits; continue;
}
final DocIdSetIterator it = createIterator(hits); final DocIdSetIterator it = createIterator(hits);
if (it == null) { if (it == null) {
continue; continue;
} }
if (counter == null) {
counter = setupCounter();
}
LongValues fv = valueSource.getValues(hits.context, null);
totCount += hits.totalHits;
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs: // Skip missing docs:
if (fv.advanceExact(doc)) { if (fv.advanceExact(doc)) {
@ -155,26 +161,34 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
} }
} }
if (counter != null) {
missingCount += counter.finish(); missingCount += counter.finish();
totCount -= missingCount; totCount -= missingCount;
} }
}
/** Counts from the provided valueSource. */ /** Counts from the provided valueSource. */
private void count(MultiLongValuesSource valueSource, List<MatchingDocs> matchingDocs) private void count(MultiLongValuesSource valueSource, List<MatchingDocs> matchingDocs)
throws IOException { throws IOException {
LongRange[] ranges = getLongRanges(); LongRangeCounter counter = null;
LongRangeCounter counter = LongRangeCounter.create(ranges, counts);
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
MultiLongValues multiValues = valueSource.getValues(hits.context); if (hits.totalHits == 0) {
continue;
}
final DocIdSetIterator it = createIterator(hits); final DocIdSetIterator it = createIterator(hits);
if (it == null) { if (it == null) {
continue; continue;
} }
if (counter == null) {
counter = setupCounter();
}
MultiLongValues multiValues = valueSource.getValues(hits.context);
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
// Skip missing docs: // Skip missing docs:
if (multiValues.advanceExact(doc)) { if (multiValues.advanceExact(doc)) {
@ -203,9 +217,11 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
} }
} }
if (counter != null) {
int missingCount = counter.finish(); int missingCount = counter.finish();
totCount -= missingCount; totCount -= missingCount;
} }
}
@Override @Override
protected LongRange[] getLongRanges() { protected LongRange[] getLongRanges() {

View File

@ -39,8 +39,8 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
/** Ranges passed to constructor. */ /** Ranges passed to constructor. */
protected final Range[] ranges; protected final Range[] ranges;
/** Counts, initialized in by subclass. */ /** Counts. */
protected final int[] counts; protected int[] counts;
/** Our field name. */ /** Our field name. */
protected final String field; protected final String field;
@ -53,7 +53,6 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
super(fastMatchQuery); super(fastMatchQuery);
this.field = field; this.field = field;
this.ranges = ranges; this.ranges = ranges;
counts = new int[ranges.length];
} }
protected abstract LongRange[] getLongRanges(); protected abstract LongRange[] getLongRanges();
@ -62,6 +61,12 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
return l; return l;
} }
protected LongRangeCounter setupCounter() {
assert counts == null;
counts = new int[ranges.length];
return LongRangeCounter.create(getLongRanges(), counts);
}
/** Counts from the provided field. */ /** Counts from the provided field. */
protected void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs) protected void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs)
throws IOException { throws IOException {
@ -69,15 +74,20 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
// load doc values for all segments up front and keep track of whether-or-not we found any that // load doc values for all segments up front and keep track of whether-or-not we found any that
// were actually multi-valued. this allows us to optimize the case where all segments contain // were actually multi-valued. this allows us to optimize the case where all segments contain
// single-values. // single-values.
SortedNumericDocValues[] multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()]; SortedNumericDocValues[] multiValuedDocVals = null;
NumericDocValues[] singleValuedDocVals = null; NumericDocValues[] singleValuedDocVals = null;
boolean foundMultiValued = false; boolean foundMultiValued = false;
for (int i = 0; i < matchingDocs.size(); i++) { for (int i = 0; i < matchingDocs.size(); i++) {
FacetsCollector.MatchingDocs hits = matchingDocs.get(i); FacetsCollector.MatchingDocs hits = matchingDocs.get(i);
if (hits.totalHits == 0) {
continue;
}
SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field); SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
if (multiValuedDocVals == null) {
multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
}
multiValuedDocVals[i] = multiValues; multiValuedDocVals[i] = multiValues;
// only bother trying to unwrap a singleton if we haven't yet seen any true multi-valued cases // only bother trying to unwrap a singleton if we haven't yet seen any true multi-valued cases
@ -94,6 +104,11 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
} }
} }
if (multiValuedDocVals == null) {
// no hits or no doc values in all segments. nothing to count:
return;
}
// we only need to keep around one or the other at this point // we only need to keep around one or the other at this point
if (foundMultiValued) { if (foundMultiValued) {
singleValuedDocVals = null; singleValuedDocVals = null;
@ -101,7 +116,7 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
multiValuedDocVals = null; multiValuedDocVals = null;
} }
LongRangeCounter counter = LongRangeCounter.create(getLongRanges(), counts); LongRangeCounter counter = setupCounter();
int missingCount = 0; int missingCount = 0;
@ -183,10 +198,16 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
@Override @Override
public FacetResult getAllChildren(String dim, String... path) throws IOException { public FacetResult getAllChildren(String dim, String... path) throws IOException {
validateDimAndPathForGetChildren(dim, path); validateDimAndPathForGetChildren(dim, path);
LabelAndValue[] labelValues = new LabelAndValue[counts.length]; LabelAndValue[] labelValues = new LabelAndValue[ranges.length];
for (int i = 0; i < counts.length; i++) { if (counts == null) {
for (int i = 0; i < ranges.length; i++) {
labelValues[i] = new LabelAndValue(ranges[i].label, 0);
}
} else {
for (int i = 0; i < ranges.length; i++) {
labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]); labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
} }
}
return new FacetResult(dim, path, totCount, labelValues, labelValues.length); return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
} }
@ -195,6 +216,11 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
validateTopN(topN); validateTopN(topN);
validateDimAndPathForGetChildren(dim, path); validateDimAndPathForGetChildren(dim, path);
if (counts == null) {
assert totCount == 0;
return new FacetResult(dim, path, totCount, new LabelAndValue[0], 0);
}
PriorityQueue<Entry> pq = PriorityQueue<Entry> pq =
new PriorityQueue<>(Math.min(topN, counts.length)) { new PriorityQueue<>(Math.min(topN, counts.length)) {
@Override @Override
@ -251,7 +277,7 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
b.append(" "); b.append(" ");
b.append(ranges[i].label); b.append(ranges[i].label);
b.append(" -> count="); b.append(" -> count=");
b.append(counts[i]); b.append(counts != null ? counts[i] : 0);
b.append('\n'); b.append('\n');
} }
return b.toString(); return b.toString();

View File

@ -19,6 +19,7 @@ package org.apache.lucene.facet.sortedset;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@ -68,6 +69,9 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
@Override @Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
validateTopN(topN); validateTopN(topN);
if (hasCounts() == false) {
return null;
}
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim, path); TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim, path);
return createFacetResult(topChildrenForPath, dim, path); return createFacetResult(topChildrenForPath, dim, path);
} }
@ -80,6 +84,10 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
return null; return null;
} }
if (hasCounts() == false) {
return null;
}
// Compute the actual results: // Compute the actual results:
int pathCount = 0; int pathCount = 0;
List<LabelAndValue> labelValues = new ArrayList<>(); List<LabelAndValue> labelValues = new ArrayList<>();
@ -111,12 +119,17 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
return -1; return -1;
} }
return getCount(ord); return hasCounts() == false ? 0 : getCount(ord);
} }
@Override @Override
public List<FacetResult> getAllDims(int topN) throws IOException { public List<FacetResult> getAllDims(int topN) throws IOException {
validateTopN(topN); validateTopN(topN);
if (hasCounts() == false) {
return Collections.emptyList();
}
List<FacetResult> results = new ArrayList<>(); List<FacetResult> results = new ArrayList<>();
for (String dim : state.getDims()) { for (String dim : state.getDims()) {
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim); TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim);
@ -136,6 +149,10 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
validateTopN(topNDims); validateTopN(topNDims);
validateTopN(topNChildren); validateTopN(topNChildren);
if (hasCounts() == false) {
return Collections.emptyList();
}
// Creates priority queue to store top dimensions and sort by their aggregated values/hits and // Creates priority queue to store top dimensions and sort by their aggregated values/hits and
// string values. // string values.
PriorityQueue<DimValue> pq = PriorityQueue<DimValue> pq =
@ -230,6 +247,9 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
return Arrays.asList(results); return Arrays.asList(results);
} }
/** Were any counts actually computed? (They may not be if there are no hits, etc.) */
abstract boolean hasCounts();
/** Retrieve the count for a specified ordinal. */ /** Retrieve the count for a specified ordinal. */
abstract int getCount(int ord); abstract int getCount(int ord);

View File

@ -77,6 +77,13 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends AbstractSortedSetDo
} }
} }
@Override
boolean hasCounts() {
// TODO: safe to always assume there are counts, but maybe it would be more optimal to
// actually track if we see a count?
return true;
}
@Override @Override
int getCount(int ord) { int getCount(int ord) {
return counts.get(ord); return counts.get(ord);
@ -99,6 +106,11 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends AbstractSortedSetDo
@Override @Override
public Void call() throws IOException { public Void call() throws IOException {
// If we're counting collected hits but there were none, short-circuit:
if (hits != null && hits.totalHits == 0) {
return null;
}
SortedSetDocValues multiValues = DocValues.getSortedSet(leafReader, field); SortedSetDocValues multiValues = DocValues.getSortedSet(leafReader, field);
if (multiValues == null) { if (multiValues == null) {
// nothing to count here // nothing to count here

View File

@ -56,7 +56,8 @@ import org.apache.lucene.util.LongValues;
* @lucene.experimental * @lucene.experimental
*/ */
public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFacetCounts { public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFacetCounts {
final int[] counts; private final SortedSetDocValuesReaderState state;
int[] counts;
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */ /** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) throws IOException { public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) throws IOException {
@ -67,7 +68,7 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state, FacetsCollector hits) public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state, FacetsCollector hits)
throws IOException { throws IOException {
super(state); super(state);
this.counts = new int[state.getSize()]; this.state = state;
if (hits == null) { if (hits == null) {
// browse only // browse only
countAll(); countAll();
@ -76,6 +77,17 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
} }
} }
private void initializeCounts() {
if (counts == null) {
counts = new int[state.getSize()];
}
}
@Override
boolean hasCounts() {
return counts != null;
}
@Override @Override
int getCount(int ord) { int getCount(int ord) {
return counts[ord]; return counts[ord];
@ -90,6 +102,9 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
return; return;
} }
// Initialize counts:
initializeCounts();
// It's slightly more efficient to work against SortedDocValues if the field is actually // It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309) // single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues); SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
@ -159,12 +174,19 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
private void countOneSegment( private void countOneSegment(
OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits, Bits liveDocs) OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits, Bits liveDocs)
throws IOException { throws IOException {
if (hits != null && hits.totalHits == 0) {
return;
}
SortedSetDocValues multiValues = DocValues.getSortedSet(reader, field); SortedSetDocValues multiValues = DocValues.getSortedSet(reader, field);
if (multiValues == null) { if (multiValues == null) {
// nothing to count // nothing to count
return; return;
} }
// Initialize counts:
initializeCounts();
// It's slightly more efficient to work against SortedDocValues if the field is actually // It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309) // single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues); SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);

View File

@ -71,11 +71,15 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
private void count(List<MatchingDocs> matchingDocs) throws IOException { private void count(List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
SortedNumericDocValues multiValued = SortedNumericDocValues multiValued =
hits.context.reader().getSortedNumericDocValues(indexFieldName); hits.context.reader().getSortedNumericDocValues(indexFieldName);
if (multiValued == null) { if (multiValued == null) {
continue; continue;
} }
initializeValueCounters();
NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued); NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued);
@ -114,13 +118,14 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
} }
private void countAll(IndexReader reader) throws IOException { private void countAll(IndexReader reader) throws IOException {
assert values != null;
for (LeafReaderContext context : reader.leaves()) { for (LeafReaderContext context : reader.leaves()) {
SortedNumericDocValues multiValued = SortedNumericDocValues multiValued =
context.reader().getSortedNumericDocValues(indexFieldName); context.reader().getSortedNumericDocValues(indexFieldName);
if (multiValued == null) { if (multiValued == null) {
continue; continue;
} }
initializeValueCounters();
assert values != null;
Bits liveDocs = context.reader().getLiveDocs(); Bits liveDocs = context.reader().getLiveDocs();

View File

@ -20,10 +20,12 @@ import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.IntArrayList;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.FacetsConfig.DimConfig; import org.apache.lucene.facet.FacetsConfig.DimConfig;
import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.facet.LabelAndValue;
@ -39,22 +41,37 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
final AssociationAggregationFunction aggregationFunction; final AssociationAggregationFunction aggregationFunction;
/** Per-ordinal value. */ /** Per-ordinal value. */
final float[] values; float[] values;
/** Sole constructor. */ /** Sole constructor. */
FloatTaxonomyFacets( FloatTaxonomyFacets(
String indexFieldName, String indexFieldName,
TaxonomyReader taxoReader, TaxonomyReader taxoReader,
AssociationAggregationFunction aggregationFunction, AssociationAggregationFunction aggregationFunction,
FacetsConfig config) FacetsConfig config,
FacetsCollector fc)
throws IOException { throws IOException {
super(indexFieldName, taxoReader, config); super(indexFieldName, taxoReader, config, fc);
this.aggregationFunction = aggregationFunction; this.aggregationFunction = aggregationFunction;
}
@Override
boolean hasValues() {
return values != null;
}
void initializeValueCounters() {
if (values == null) {
values = new float[taxoReader.getSize()]; values = new float[taxoReader.getSize()];
} }
}
/** Rolls up any single-valued hierarchical dimensions. */ /** Rolls up any single-valued hierarchical dimensions. */
void rollup() throws IOException { void rollup() throws IOException {
if (values == null) {
return;
}
// Rollup any necessary dims: // Rollup any necessary dims:
int[] children = getChildren(); int[] children = getChildren();
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) { for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
@ -100,7 +117,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
if (ord < 0) { if (ord < 0) {
return -1; return -1;
} }
return values[ord]; return values == null ? 0 : values[ord];
} }
@Override @Override
@ -112,6 +129,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
return null; return null;
} }
if (values == null) {
return null;
}
int[] children = getChildren(); int[] children = getChildren();
int[] siblings = getSiblings(); int[] siblings = getSiblings();
@ -166,6 +187,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
return null; return null;
} }
if (values == null) {
return null;
}
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN); TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
return createFacetResult(topChildrenForPath, dim, path); return createFacetResult(topChildrenForPath, dim, path);
} }
@ -264,6 +289,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
validateTopN(topNDims); validateTopN(topNDims);
validateTopN(topNChildren); validateTopN(topNChildren);
if (values == null) {
return Collections.emptyList();
}
// get existing children and siblings ordinal array from TaxonomyFacets // get existing children and siblings ordinal array from TaxonomyFacets
int[] children = getChildren(); int[] children = getChildren();
int[] siblings = getSiblings(); int[] siblings = getSiblings();

View File

@ -21,6 +21,7 @@ import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor; import com.carrotsearch.hppc.cursors.IntIntCursor;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -40,10 +41,13 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
final AssociationAggregationFunction aggregationFunction; final AssociationAggregationFunction aggregationFunction;
/** Dense ordinal values. */ /** Dense ordinal values. */
final int[] values; int[] values;
/** Sparse ordinal values. */ /** Sparse ordinal values. */
final IntIntHashMap sparseValues; IntIntHashMap sparseValues;
/** Have value counters been initialized. */
boolean initialized;
/** Sole constructor. */ /** Sole constructor. */
IntTaxonomyFacets( IntTaxonomyFacets(
@ -53,14 +57,24 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
AssociationAggregationFunction aggregationFunction, AssociationAggregationFunction aggregationFunction,
FacetsCollector fc) FacetsCollector fc)
throws IOException { throws IOException {
super(indexFieldName, taxoReader, config); super(indexFieldName, taxoReader, config, fc);
this.aggregationFunction = aggregationFunction; this.aggregationFunction = aggregationFunction;
}
@Override
boolean hasValues() {
return initialized;
}
void initializeValueCounters() {
if (initialized) {
return;
}
initialized = true;
assert sparseValues == null && values == null;
if (useHashTable(fc, taxoReader)) { if (useHashTable(fc, taxoReader)) {
sparseValues = new IntIntHashMap(); sparseValues = new IntIntHashMap();
values = null;
} else { } else {
sparseValues = null;
values = new int[taxoReader.getSize()]; values = new int[taxoReader.getSize()];
} }
} }
@ -85,6 +99,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
/** Rolls up any single-valued hierarchical dimensions. */ /** Rolls up any single-valued hierarchical dimensions. */
void rollup() throws IOException { void rollup() throws IOException {
if (initialized == false) {
return;
}
// Rollup any necessary dims: // Rollup any necessary dims:
int[] children = null; int[] children = null;
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) { for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
@ -161,7 +179,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
if (ord < 0) { if (ord < 0) {
return -1; return -1;
} }
return getValue(ord); return initialized ? getValue(ord) : 0;
} }
@Override @Override
@ -173,6 +191,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
return null; return null;
} }
if (initialized == false) {
return null;
}
int aggregatedValue = 0; int aggregatedValue = 0;
IntArrayList ordinals = new IntArrayList(); IntArrayList ordinals = new IntArrayList();
@ -239,6 +261,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
return null; return null;
} }
if (initialized == false) {
return null;
}
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN); TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
return createFacetResult(topChildrenForPath, dim, path); return createFacetResult(topChildrenForPath, dim, path);
} }
@ -324,6 +350,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
throw new IllegalArgumentException("topN must be > 0"); throw new IllegalArgumentException("topN must be > 0");
} }
if (initialized == false) {
return Collections.emptyList();
}
// get children and siblings ordinal array from TaxonomyFacets // get children and siblings ordinal array from TaxonomyFacets
int[] children = getChildren(); int[] children = getChildren();
int[] siblings = getSiblings(); int[] siblings = getSiblings();

View File

@ -88,7 +88,7 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
FacetsCollector fc, FacetsCollector fc,
AssociationAggregationFunction aggregationFunction) AssociationAggregationFunction aggregationFunction)
throws IOException { throws IOException {
super(indexFieldName, taxoReader, aggregationFunction, config); super(indexFieldName, taxoReader, aggregationFunction, config, fc);
aggregateValues(aggregationFunction, fc.getMatchingDocs()); aggregateValues(aggregationFunction, fc.getMatchingDocs());
} }
@ -104,7 +104,7 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
AssociationAggregationFunction aggregationFunction, AssociationAggregationFunction aggregationFunction,
DoubleValuesSource valuesSource) DoubleValuesSource valuesSource)
throws IOException { throws IOException {
super(indexFieldName, taxoReader, aggregationFunction, config); super(indexFieldName, taxoReader, aggregationFunction, config, fc);
aggregateValues(aggregationFunction, fc.getMatchingDocs(), fc.getKeepScores(), valuesSource); aggregateValues(aggregationFunction, fc.getMatchingDocs(), fc.getKeepScores(), valuesSource);
} }
@ -134,6 +134,11 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
DoubleValuesSource valueSource) DoubleValuesSource valueSource)
throws IOException { throws IOException {
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeValueCounters();
SortedNumericDocValues ordinalValues = SortedNumericDocValues ordinalValues =
DocValues.getSortedNumeric(hits.context.reader(), indexFieldName); DocValues.getSortedNumeric(hits.context.reader(), indexFieldName);
DoubleValues scores = keepScores ? scores(hits) : null; DoubleValues scores = keepScores ? scores(hits) : null;
@ -164,6 +169,11 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
throws IOException { throws IOException {
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeValueCounters();
BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName); BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName);
DocIdSetIterator it = DocIdSetIterator it =
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv)); ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));

View File

@ -63,6 +63,11 @@ public class TaxonomyFacetIntAssociations extends IntTaxonomyFacets {
AssociationAggregationFunction aggregationFunction, List<MatchingDocs> matchingDocs) AssociationAggregationFunction aggregationFunction, List<MatchingDocs> matchingDocs)
throws IOException { throws IOException {
for (MatchingDocs hits : matchingDocs) { for (MatchingDocs hits : matchingDocs) {
if (hits.totalHits == 0) {
continue;
}
initializeValueCounters();
BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName); BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName);
DocIdSetIterator it = ConjunctionUtils.intersectIterators(List.of(hits.bits.iterator(), dv)); DocIdSetIterator it = ConjunctionUtils.intersectIterators(List.of(hits.bits.iterator(), dv));

View File

@ -19,11 +19,13 @@ package org.apache.lucene.facet.taxonomy;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.FacetsConfig.DimConfig; import org.apache.lucene.facet.FacetsConfig.DimConfig;
@ -53,6 +55,9 @@ abstract class TaxonomyFacets extends Facets {
/** {@code FacetsConfig} provided to the constructor. */ /** {@code FacetsConfig} provided to the constructor. */
final FacetsConfig config; final FacetsConfig config;
/** {@code FacetsCollector} provided to the constructor. */
final FacetsCollector fc;
/** Maps parent ordinal to its child, or -1 if the parent is childless. */ /** Maps parent ordinal to its child, or -1 if the parent is childless. */
private int[] children; private int[] children;
@ -63,11 +68,13 @@ abstract class TaxonomyFacets extends Facets {
final int[] parents; final int[] parents;
/** Sole constructor. */ /** Sole constructor. */
TaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) TaxonomyFacets(
String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
throws IOException { throws IOException {
this.indexFieldName = indexFieldName; this.indexFieldName = indexFieldName;
this.taxoReader = taxoReader; this.taxoReader = taxoReader;
this.config = config; this.config = config;
this.fc = fc;
parents = taxoReader.getParallelTaxonomyArrays().parents(); parents = taxoReader.getParallelTaxonomyArrays().parents();
} }
@ -138,6 +145,11 @@ abstract class TaxonomyFacets extends Facets {
@Override @Override
public List<FacetResult> getAllDims(int topN) throws IOException { public List<FacetResult> getAllDims(int topN) throws IOException {
validateTopN(topN); validateTopN(topN);
if (hasValues() == false) {
return Collections.emptyList();
}
int[] children = getChildren(); int[] children = getChildren();
int[] siblings = getSiblings(); int[] siblings = getSiblings();
int ord = children[TaxonomyReader.ROOT_ORDINAL]; int ord = children[TaxonomyReader.ROOT_ORDINAL];
@ -158,4 +170,7 @@ abstract class TaxonomyFacets extends Facets {
results.sort(BY_VALUE_THEN_DIM); results.sort(BY_VALUE_THEN_DIM);
return results; return results;
} }
/** Were any values actually aggregated during counting? */
abstract boolean hasValues();
} }