mirror of https://github.com/apache/lucene.git
Initialize facet counting data structures lazily (#12408)
This change covers: * Taxonomy faceting * FastTaxonomyFacetCounts * TaxonomyFacetIntAssociations * TaxonomyFacetFloatAssociations * SSDV faceting * SortedSetDocValuesFacetCounts * ConcurrentSortedSetDocValuesFacetCounts * StringValueFacetCounts * Range faceting: * LongRangeFacetCounts * DoubleRangeFacetCounts * Long faceting: * LongValueFacetCounts Left for a future iteration: * RangeOnRange faceting * FacetSet faceting
This commit is contained in:
parent
2b3b028734
commit
179b45bc23
|
@ -161,6 +161,9 @@ Optimizations
|
|||
|
||||
* GITHUB#12453: Faster bulk numeric reads from BufferedIndexInput (Armin Braun)
|
||||
|
||||
* GITHUB#12408: Lazy initialization improvements for Facets implementations when there are segments with no hits
|
||||
to count. (Greg Miller)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -51,10 +51,13 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
public class LongValueFacetCounts extends Facets {
|
||||
|
||||
/** Used for all values that are < 1K. */
|
||||
private final int[] counts = new int[1024];
|
||||
private int[] counts;
|
||||
|
||||
/** Used for all values that are >= 1K. */
|
||||
private final LongIntHashMap hashCounts = new LongIntHashMap();
|
||||
private LongIntHashMap hashCounts;
|
||||
|
||||
/** Whether-or-not counters have been initialized. */
|
||||
private boolean initialized;
|
||||
|
||||
/** Field being counted. */
|
||||
private final String field;
|
||||
|
@ -125,6 +128,7 @@ public class LongValueFacetCounts extends Facets {
|
|||
public LongValueFacetCounts(String field, LongValuesSource valueSource, IndexReader reader)
|
||||
throws IOException {
|
||||
this.field = field;
|
||||
initializeCounters();
|
||||
if (valueSource != null) {
|
||||
countAll(reader, valueSource);
|
||||
} else {
|
||||
|
@ -141,6 +145,7 @@ public class LongValueFacetCounts extends Facets {
|
|||
public LongValueFacetCounts(String field, MultiLongValuesSource valuesSource, IndexReader reader)
|
||||
throws IOException {
|
||||
this.field = field;
|
||||
initializeCounters();
|
||||
if (valuesSource != null) {
|
||||
LongValuesSource singleValued = MultiLongValuesSource.unwrapSingleton(valuesSource);
|
||||
if (singleValued != null) {
|
||||
|
@ -153,11 +158,25 @@ public class LongValueFacetCounts extends Facets {
|
|||
}
|
||||
}
|
||||
|
||||
private void initializeCounters() {
|
||||
if (initialized) {
|
||||
return;
|
||||
}
|
||||
assert counts == null && hashCounts == null;
|
||||
initialized = true;
|
||||
counts = new int[1024];
|
||||
hashCounts = new LongIntHashMap();
|
||||
}
|
||||
|
||||
/** Counts from the provided valueSource. */
|
||||
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs)
|
||||
throws IOException {
|
||||
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
initializeCounters();
|
||||
|
||||
LongValues fv = valueSource.getValues(hits.context, null);
|
||||
|
||||
|
@ -183,6 +202,10 @@ public class LongValueFacetCounts extends Facets {
|
|||
private void count(MultiLongValuesSource valuesSource, List<MatchingDocs> matchingDocs)
|
||||
throws IOException {
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
initializeCounters();
|
||||
|
||||
MultiLongValues multiValues = valuesSource.getValues(hits.context);
|
||||
|
||||
|
@ -213,6 +236,10 @@ public class LongValueFacetCounts extends Facets {
|
|||
/** Counts from the field's indexed doc values. */
|
||||
private void count(String field, List<MatchingDocs> matchingDocs) throws IOException {
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
initializeCounters();
|
||||
|
||||
SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
|
||||
NumericDocValues singleValues = DocValues.unwrapSingleton(multiValues);
|
||||
|
@ -350,6 +377,13 @@ public class LongValueFacetCounts extends Facets {
|
|||
@Override
|
||||
public FacetResult getAllChildren(String dim, String... path) throws IOException {
|
||||
validateDimAndPathForGetChildren(dim, path);
|
||||
|
||||
if (initialized == false) {
|
||||
// nothing was counted (either no hits or no values for all hits):
|
||||
assert totCount == 0;
|
||||
return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
|
||||
}
|
||||
|
||||
List<LabelAndValue> labelValues = new ArrayList<>();
|
||||
for (int i = 0; i < counts.length; i++) {
|
||||
if (counts[i] != 0) {
|
||||
|
@ -378,6 +412,12 @@ public class LongValueFacetCounts extends Facets {
|
|||
validateTopN(topN);
|
||||
validateDimAndPathForGetChildren(dim, path);
|
||||
|
||||
if (initialized == false) {
|
||||
// nothing was counted (either no hits or no values for all hits):
|
||||
assert totCount == 0;
|
||||
return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
|
||||
}
|
||||
|
||||
PriorityQueue<Entry> pq =
|
||||
new PriorityQueue<>(Math.min(topN, counts.length + hashCounts.size())) {
|
||||
@Override
|
||||
|
@ -440,6 +480,12 @@ public class LongValueFacetCounts extends Facets {
|
|||
* efficient to use {@link #getAllChildren(String, String...)}.
|
||||
*/
|
||||
public FacetResult getAllChildrenSortByValue() {
|
||||
if (initialized == false) {
|
||||
// nothing was counted (either no hits or no values for all hits):
|
||||
assert totCount == 0;
|
||||
return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0);
|
||||
}
|
||||
|
||||
List<LabelAndValue> labelValues = new ArrayList<>();
|
||||
|
||||
// compact & sort hash table's arrays by value
|
||||
|
@ -533,27 +579,29 @@ public class LongValueFacetCounts extends Facets {
|
|||
StringBuilder b = new StringBuilder();
|
||||
b.append("LongValueFacetCounts totCount=");
|
||||
b.append(totCount);
|
||||
b.append(":\n");
|
||||
for (int i = 0; i < counts.length; i++) {
|
||||
if (counts[i] != 0) {
|
||||
b.append(" ");
|
||||
b.append(i);
|
||||
b.append(" -> count=");
|
||||
b.append(counts[i]);
|
||||
b.append('\n');
|
||||
}
|
||||
}
|
||||
|
||||
if (hashCounts.size() != 0) {
|
||||
for (LongIntCursor c : hashCounts) {
|
||||
if (c.value != 0) {
|
||||
if (initialized) {
|
||||
b.append(":\n");
|
||||
for (int i = 0; i < counts.length; i++) {
|
||||
if (counts[i] != 0) {
|
||||
b.append(" ");
|
||||
b.append(c.key);
|
||||
b.append(i);
|
||||
b.append(" -> count=");
|
||||
b.append(c.value);
|
||||
b.append(counts[i]);
|
||||
b.append('\n');
|
||||
}
|
||||
}
|
||||
|
||||
if (hashCounts.size() != 0) {
|
||||
for (LongIntCursor c : hashCounts) {
|
||||
if (c.value != 0) {
|
||||
b.append(" ");
|
||||
b.append(c.key);
|
||||
b.append(" -> count=");
|
||||
b.append(c.value);
|
||||
b.append('\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return b.toString();
|
||||
|
|
|
@ -69,8 +69,9 @@ public class StringValueFacetCounts extends Facets {
|
|||
private final OrdinalMap ordinalMap;
|
||||
private final SortedSetDocValues docValues;
|
||||
|
||||
private final int[] denseCounts;
|
||||
private int[] denseCounts;
|
||||
private final IntIntHashMap sparseCounts;
|
||||
private boolean initialized;
|
||||
|
||||
private final int cardinality;
|
||||
private int totalDocCount;
|
||||
|
@ -101,7 +102,9 @@ public class StringValueFacetCounts extends Facets {
|
|||
if (facetsCollector != null) {
|
||||
if (cardinality < 1024) { // count densely for low cardinality
|
||||
sparseCounts = null;
|
||||
denseCounts = new int[cardinality];
|
||||
denseCounts = null;
|
||||
initialized = false;
|
||||
count(facetsCollector);
|
||||
} else {
|
||||
int totalHits = 0;
|
||||
int totalDocs = 0;
|
||||
|
@ -110,22 +113,31 @@ public class StringValueFacetCounts extends Facets {
|
|||
totalDocs += matchingDocs.context.reader().maxDoc();
|
||||
}
|
||||
|
||||
// If our result set is < 10% of the index, we collect sparsely (use hash map). This
|
||||
// heuristic is borrowed from IntTaxonomyFacetCounts:
|
||||
if (totalHits < totalDocs / 10) {
|
||||
sparseCounts = new IntIntHashMap();
|
||||
denseCounts = null;
|
||||
} else {
|
||||
// No counting needed if there are no hits:
|
||||
if (totalHits == 0) {
|
||||
sparseCounts = null;
|
||||
denseCounts = new int[cardinality];
|
||||
denseCounts = null;
|
||||
initialized = true;
|
||||
} else {
|
||||
// If our result set is < 10% of the index, we collect sparsely (use hash map). This
|
||||
// heuristic is borrowed from IntTaxonomyFacetCounts:
|
||||
if (totalHits < totalDocs / 10) {
|
||||
sparseCounts = new IntIntHashMap();
|
||||
denseCounts = null;
|
||||
initialized = true;
|
||||
} else {
|
||||
sparseCounts = null;
|
||||
denseCounts = new int[cardinality];
|
||||
initialized = true;
|
||||
}
|
||||
count(facetsCollector);
|
||||
}
|
||||
}
|
||||
|
||||
count(facetsCollector);
|
||||
} else {
|
||||
// Since we're counting all ordinals, count densely:
|
||||
sparseCounts = null;
|
||||
denseCounts = new int[cardinality];
|
||||
initialized = true;
|
||||
|
||||
countAll();
|
||||
}
|
||||
|
@ -294,6 +306,9 @@ public class StringValueFacetCounts extends Facets {
|
|||
if (matchingDocs.size() == 1) {
|
||||
|
||||
FacetsCollector.MatchingDocs hits = matchingDocs.get(0);
|
||||
if (hits.totalHits == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate state before doing anything else:
|
||||
validateState(hits.context);
|
||||
|
@ -314,6 +329,10 @@ public class StringValueFacetCounts extends Facets {
|
|||
assert ordinalMap != null;
|
||||
assert docValues instanceof MultiDocValues.MultiSortedSetDocValues;
|
||||
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
MultiDocValues.MultiSortedSetDocValues multiValues =
|
||||
(MultiDocValues.MultiSortedSetDocValues) docValues;
|
||||
|
||||
|
@ -368,6 +387,13 @@ public class StringValueFacetCounts extends Facets {
|
|||
FacetsCollector.MatchingDocs hits,
|
||||
Bits liveDocs)
|
||||
throws IOException {
|
||||
if (initialized == false) {
|
||||
assert denseCounts == null && sparseCounts == null;
|
||||
// If the counters weren't initialized, we can assume the cardinality is low enough that
|
||||
// dense counting will be preferrable:
|
||||
denseCounts = new int[cardinality];
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
// It's slightly more efficient to work against SortedDocValues if the field is actually
|
||||
// single-valued (see: LUCENE-5309)
|
||||
|
|
|
@ -157,20 +157,25 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
|
|||
private void count(DoubleValuesSource valueSource, List<MatchingDocs> matchingDocs)
|
||||
throws IOException {
|
||||
|
||||
LongRange[] longRanges = getLongRanges();
|
||||
|
||||
LongRangeCounter counter = LongRangeCounter.create(longRanges, counts);
|
||||
|
||||
LongRangeCounter counter = null;
|
||||
int missingCount = 0;
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
DoubleValues fv = valueSource.getValues(hits.context, null);
|
||||
totCount += hits.totalHits;
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final DocIdSetIterator it = createIterator(hits);
|
||||
if (it == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (counter == null) {
|
||||
counter = setupCounter();
|
||||
}
|
||||
|
||||
DoubleValues fv = valueSource.getValues(hits.context, null);
|
||||
totCount += hits.totalHits;
|
||||
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
|
||||
// Skip missing docs:
|
||||
if (fv.advanceExact(doc)) {
|
||||
|
@ -183,27 +188,34 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
|
|||
}
|
||||
}
|
||||
|
||||
missingCount += counter.finish();
|
||||
totCount -= missingCount;
|
||||
if (counter != null) {
|
||||
missingCount += counter.finish();
|
||||
totCount -= missingCount;
|
||||
}
|
||||
}
|
||||
|
||||
/** Counts from the provided valueSource. */
|
||||
private void count(MultiDoubleValuesSource valueSource, List<MatchingDocs> matchingDocs)
|
||||
throws IOException {
|
||||
|
||||
LongRange[] longRanges = getLongRanges();
|
||||
|
||||
LongRangeCounter counter = LongRangeCounter.create(longRanges, counts);
|
||||
|
||||
LongRangeCounter counter = null; // LongRangeCounter.create(longRanges, counts);
|
||||
int missingCount = 0;
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
MultiDoubleValues multiValues = valueSource.getValues(hits.context);
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final DocIdSetIterator it = createIterator(hits);
|
||||
if (it == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (counter == null) {
|
||||
counter = setupCounter();
|
||||
}
|
||||
|
||||
MultiDoubleValues multiValues = valueSource.getValues(hits.context);
|
||||
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
|
||||
// Skip missing docs:
|
||||
if (multiValues.advanceExact(doc)) {
|
||||
|
@ -232,8 +244,10 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts {
|
|||
}
|
||||
}
|
||||
|
||||
missingCount += counter.finish();
|
||||
totCount -= missingCount;
|
||||
if (counter != null) {
|
||||
missingCount += counter.finish();
|
||||
totCount -= missingCount;
|
||||
}
|
||||
}
|
||||
|
||||
/** Create long ranges from the double ranges. */
|
||||
|
|
|
@ -128,21 +128,27 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
|
|||
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs)
|
||||
throws IOException {
|
||||
|
||||
LongRange[] ranges = getLongRanges();
|
||||
|
||||
LongRangeCounter counter = LongRangeCounter.create(ranges, counts);
|
||||
LongRangeCounter counter = null;
|
||||
|
||||
int missingCount = 0;
|
||||
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
LongValues fv = valueSource.getValues(hits.context, null);
|
||||
totCount += hits.totalHits;
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final DocIdSetIterator it = createIterator(hits);
|
||||
if (it == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (counter == null) {
|
||||
counter = setupCounter();
|
||||
}
|
||||
|
||||
LongValues fv = valueSource.getValues(hits.context, null);
|
||||
totCount += hits.totalHits;
|
||||
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
|
||||
// Skip missing docs:
|
||||
if (fv.advanceExact(doc)) {
|
||||
|
@ -155,26 +161,34 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
|
|||
}
|
||||
}
|
||||
|
||||
missingCount += counter.finish();
|
||||
totCount -= missingCount;
|
||||
if (counter != null) {
|
||||
missingCount += counter.finish();
|
||||
totCount -= missingCount;
|
||||
}
|
||||
}
|
||||
|
||||
/** Counts from the provided valueSource. */
|
||||
private void count(MultiLongValuesSource valueSource, List<MatchingDocs> matchingDocs)
|
||||
throws IOException {
|
||||
|
||||
LongRange[] ranges = getLongRanges();
|
||||
|
||||
LongRangeCounter counter = LongRangeCounter.create(ranges, counts);
|
||||
LongRangeCounter counter = null;
|
||||
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
MultiLongValues multiValues = valueSource.getValues(hits.context);
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final DocIdSetIterator it = createIterator(hits);
|
||||
if (it == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (counter == null) {
|
||||
counter = setupCounter();
|
||||
}
|
||||
|
||||
MultiLongValues multiValues = valueSource.getValues(hits.context);
|
||||
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
|
||||
// Skip missing docs:
|
||||
if (multiValues.advanceExact(doc)) {
|
||||
|
@ -203,8 +217,10 @@ public class LongRangeFacetCounts extends RangeFacetCounts {
|
|||
}
|
||||
}
|
||||
|
||||
int missingCount = counter.finish();
|
||||
totCount -= missingCount;
|
||||
if (counter != null) {
|
||||
int missingCount = counter.finish();
|
||||
totCount -= missingCount;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -39,8 +39,8 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
|||
/** Ranges passed to constructor. */
|
||||
protected final Range[] ranges;
|
||||
|
||||
/** Counts, initialized in by subclass. */
|
||||
protected final int[] counts;
|
||||
/** Counts. */
|
||||
protected int[] counts;
|
||||
|
||||
/** Our field name. */
|
||||
protected final String field;
|
||||
|
@ -53,7 +53,6 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
|||
super(fastMatchQuery);
|
||||
this.field = field;
|
||||
this.ranges = ranges;
|
||||
counts = new int[ranges.length];
|
||||
}
|
||||
|
||||
protected abstract LongRange[] getLongRanges();
|
||||
|
@ -62,6 +61,12 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
|||
return l;
|
||||
}
|
||||
|
||||
protected LongRangeCounter setupCounter() {
|
||||
assert counts == null;
|
||||
counts = new int[ranges.length];
|
||||
return LongRangeCounter.create(getLongRanges(), counts);
|
||||
}
|
||||
|
||||
/** Counts from the provided field. */
|
||||
protected void count(String field, List<FacetsCollector.MatchingDocs> matchingDocs)
|
||||
throws IOException {
|
||||
|
@ -69,15 +74,20 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
|||
// load doc values for all segments up front and keep track of whether-or-not we found any that
|
||||
// were actually multi-valued. this allows us to optimize the case where all segments contain
|
||||
// single-values.
|
||||
SortedNumericDocValues[] multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
|
||||
SortedNumericDocValues[] multiValuedDocVals = null;
|
||||
NumericDocValues[] singleValuedDocVals = null;
|
||||
boolean foundMultiValued = false;
|
||||
|
||||
for (int i = 0; i < matchingDocs.size(); i++) {
|
||||
|
||||
FacetsCollector.MatchingDocs hits = matchingDocs.get(i);
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field);
|
||||
if (multiValuedDocVals == null) {
|
||||
multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()];
|
||||
}
|
||||
multiValuedDocVals[i] = multiValues;
|
||||
|
||||
// only bother trying to unwrap a singleton if we haven't yet seen any true multi-valued cases
|
||||
|
@ -94,6 +104,11 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
|||
}
|
||||
}
|
||||
|
||||
if (multiValuedDocVals == null) {
|
||||
// no hits or no doc values in all segments. nothing to count:
|
||||
return;
|
||||
}
|
||||
|
||||
// we only need to keep around one or the other at this point
|
||||
if (foundMultiValued) {
|
||||
singleValuedDocVals = null;
|
||||
|
@ -101,7 +116,7 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
|||
multiValuedDocVals = null;
|
||||
}
|
||||
|
||||
LongRangeCounter counter = LongRangeCounter.create(getLongRanges(), counts);
|
||||
LongRangeCounter counter = setupCounter();
|
||||
|
||||
int missingCount = 0;
|
||||
|
||||
|
@ -183,9 +198,15 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
|||
@Override
|
||||
public FacetResult getAllChildren(String dim, String... path) throws IOException {
|
||||
validateDimAndPathForGetChildren(dim, path);
|
||||
LabelAndValue[] labelValues = new LabelAndValue[counts.length];
|
||||
for (int i = 0; i < counts.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
|
||||
LabelAndValue[] labelValues = new LabelAndValue[ranges.length];
|
||||
if (counts == null) {
|
||||
for (int i = 0; i < ranges.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(ranges[i].label, 0);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < ranges.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
|
||||
}
|
||||
}
|
||||
return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
|
||||
}
|
||||
|
@ -195,6 +216,11 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
|||
validateTopN(topN);
|
||||
validateDimAndPathForGetChildren(dim, path);
|
||||
|
||||
if (counts == null) {
|
||||
assert totCount == 0;
|
||||
return new FacetResult(dim, path, totCount, new LabelAndValue[0], 0);
|
||||
}
|
||||
|
||||
PriorityQueue<Entry> pq =
|
||||
new PriorityQueue<>(Math.min(topN, counts.length)) {
|
||||
@Override
|
||||
|
@ -251,7 +277,7 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery {
|
|||
b.append(" ");
|
||||
b.append(ranges[i].label);
|
||||
b.append(" -> count=");
|
||||
b.append(counts[i]);
|
||||
b.append(counts != null ? counts[i] : 0);
|
||||
b.append('\n');
|
||||
}
|
||||
return b.toString();
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.facet.sortedset;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
@ -68,6 +69,9 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
|||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
validateTopN(topN);
|
||||
if (hasCounts() == false) {
|
||||
return null;
|
||||
}
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim, path);
|
||||
return createFacetResult(topChildrenForPath, dim, path);
|
||||
}
|
||||
|
@ -80,6 +84,10 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
|||
return null;
|
||||
}
|
||||
|
||||
if (hasCounts() == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Compute the actual results:
|
||||
int pathCount = 0;
|
||||
List<LabelAndValue> labelValues = new ArrayList<>();
|
||||
|
@ -111,12 +119,17 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
|||
return -1;
|
||||
}
|
||||
|
||||
return getCount(ord);
|
||||
return hasCounts() == false ? 0 : getCount(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
validateTopN(topN);
|
||||
|
||||
if (hasCounts() == false) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
List<FacetResult> results = new ArrayList<>();
|
||||
for (String dim : state.getDims()) {
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim);
|
||||
|
@ -136,6 +149,10 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
|||
validateTopN(topNDims);
|
||||
validateTopN(topNChildren);
|
||||
|
||||
if (hasCounts() == false) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
// Creates priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValue> pq =
|
||||
|
@ -230,6 +247,9 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
|||
return Arrays.asList(results);
|
||||
}
|
||||
|
||||
/** Were any counts actually computed? (They may not be if there are no hits, etc.) */
|
||||
abstract boolean hasCounts();
|
||||
|
||||
/** Retrieve the count for a specified ordinal. */
|
||||
abstract int getCount(int ord);
|
||||
|
||||
|
|
|
@ -77,6 +77,13 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends AbstractSortedSetDo
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean hasCounts() {
|
||||
// TODO: safe to always assume there are counts, but maybe it would be more optimal to
|
||||
// actually track if we see a count?
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
int getCount(int ord) {
|
||||
return counts.get(ord);
|
||||
|
@ -99,6 +106,11 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends AbstractSortedSetDo
|
|||
|
||||
@Override
|
||||
public Void call() throws IOException {
|
||||
// If we're counting collected hits but there were none, short-circuit:
|
||||
if (hits != null && hits.totalHits == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
SortedSetDocValues multiValues = DocValues.getSortedSet(leafReader, field);
|
||||
if (multiValues == null) {
|
||||
// nothing to count here
|
||||
|
|
|
@ -56,7 +56,8 @@ import org.apache.lucene.util.LongValues;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFacetCounts {
|
||||
final int[] counts;
|
||||
private final SortedSetDocValuesReaderState state;
|
||||
int[] counts;
|
||||
|
||||
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
|
||||
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) throws IOException {
|
||||
|
@ -67,7 +68,7 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
|
|||
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state, FacetsCollector hits)
|
||||
throws IOException {
|
||||
super(state);
|
||||
this.counts = new int[state.getSize()];
|
||||
this.state = state;
|
||||
if (hits == null) {
|
||||
// browse only
|
||||
countAll();
|
||||
|
@ -76,6 +77,17 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
|
|||
}
|
||||
}
|
||||
|
||||
private void initializeCounts() {
|
||||
if (counts == null) {
|
||||
counts = new int[state.getSize()];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean hasCounts() {
|
||||
return counts != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
int getCount(int ord) {
|
||||
return counts[ord];
|
||||
|
@ -90,6 +102,9 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
|
|||
return;
|
||||
}
|
||||
|
||||
// Initialize counts:
|
||||
initializeCounts();
|
||||
|
||||
// It's slightly more efficient to work against SortedDocValues if the field is actually
|
||||
// single-valued (see: LUCENE-5309)
|
||||
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
|
||||
|
@ -159,12 +174,19 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace
|
|||
private void countOneSegment(
|
||||
OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits, Bits liveDocs)
|
||||
throws IOException {
|
||||
if (hits != null && hits.totalHits == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
SortedSetDocValues multiValues = DocValues.getSortedSet(reader, field);
|
||||
if (multiValues == null) {
|
||||
// nothing to count
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize counts:
|
||||
initializeCounts();
|
||||
|
||||
// It's slightly more efficient to work against SortedDocValues if the field is actually
|
||||
// single-valued (see: LUCENE-5309)
|
||||
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
|
||||
|
|
|
@ -71,11 +71,15 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
|||
|
||||
private void count(List<MatchingDocs> matchingDocs) throws IOException {
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
SortedNumericDocValues multiValued =
|
||||
hits.context.reader().getSortedNumericDocValues(indexFieldName);
|
||||
if (multiValued == null) {
|
||||
continue;
|
||||
}
|
||||
initializeValueCounters();
|
||||
|
||||
NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued);
|
||||
|
||||
|
@ -114,13 +118,14 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
|||
}
|
||||
|
||||
private void countAll(IndexReader reader) throws IOException {
|
||||
assert values != null;
|
||||
for (LeafReaderContext context : reader.leaves()) {
|
||||
SortedNumericDocValues multiValued =
|
||||
context.reader().getSortedNumericDocValues(indexFieldName);
|
||||
if (multiValued == null) {
|
||||
continue;
|
||||
}
|
||||
initializeValueCounters();
|
||||
assert values != null;
|
||||
|
||||
Bits liveDocs = context.reader().getLiveDocs();
|
||||
|
||||
|
|
|
@ -20,10 +20,12 @@ import com.carrotsearch.hppc.FloatArrayList;
|
|||
import com.carrotsearch.hppc.IntArrayList;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.FacetsConfig.DimConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
|
@ -39,22 +41,37 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
final AssociationAggregationFunction aggregationFunction;
|
||||
|
||||
/** Per-ordinal value. */
|
||||
final float[] values;
|
||||
float[] values;
|
||||
|
||||
/** Sole constructor. */
|
||||
FloatTaxonomyFacets(
|
||||
String indexFieldName,
|
||||
TaxonomyReader taxoReader,
|
||||
AssociationAggregationFunction aggregationFunction,
|
||||
FacetsConfig config)
|
||||
FacetsConfig config,
|
||||
FacetsCollector fc)
|
||||
throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
super(indexFieldName, taxoReader, config, fc);
|
||||
this.aggregationFunction = aggregationFunction;
|
||||
values = new float[taxoReader.getSize()];
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean hasValues() {
|
||||
return values != null;
|
||||
}
|
||||
|
||||
void initializeValueCounters() {
|
||||
if (values == null) {
|
||||
values = new float[taxoReader.getSize()];
|
||||
}
|
||||
}
|
||||
|
||||
/** Rolls up any single-valued hierarchical dimensions. */
|
||||
void rollup() throws IOException {
|
||||
if (values == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Rollup any necessary dims:
|
||||
int[] children = getChildren();
|
||||
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
|
@ -100,7 +117,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return values[ord];
|
||||
return values == null ? 0 : values[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -112,6 +129,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
return null;
|
||||
}
|
||||
|
||||
if (values == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
|
||||
|
@ -166,6 +187,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
return null;
|
||||
}
|
||||
|
||||
if (values == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
|
||||
return createFacetResult(topChildrenForPath, dim, path);
|
||||
}
|
||||
|
@ -264,6 +289,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
validateTopN(topNDims);
|
||||
validateTopN(topNChildren);
|
||||
|
||||
if (values == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
// get existing children and siblings ordinal array from TaxonomyFacets
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
|
|
|
@ -21,6 +21,7 @@ import com.carrotsearch.hppc.IntIntHashMap;
|
|||
import com.carrotsearch.hppc.cursors.IntIntCursor;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -40,10 +41,13 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
final AssociationAggregationFunction aggregationFunction;
|
||||
|
||||
/** Dense ordinal values. */
|
||||
final int[] values;
|
||||
int[] values;
|
||||
|
||||
/** Sparse ordinal values. */
|
||||
final IntIntHashMap sparseValues;
|
||||
IntIntHashMap sparseValues;
|
||||
|
||||
/** Have value counters been initialized. */
|
||||
boolean initialized;
|
||||
|
||||
/** Sole constructor. */
|
||||
IntTaxonomyFacets(
|
||||
|
@ -53,14 +57,24 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
AssociationAggregationFunction aggregationFunction,
|
||||
FacetsCollector fc)
|
||||
throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
super(indexFieldName, taxoReader, config, fc);
|
||||
this.aggregationFunction = aggregationFunction;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean hasValues() {
|
||||
return initialized;
|
||||
}
|
||||
|
||||
void initializeValueCounters() {
|
||||
if (initialized) {
|
||||
return;
|
||||
}
|
||||
initialized = true;
|
||||
assert sparseValues == null && values == null;
|
||||
if (useHashTable(fc, taxoReader)) {
|
||||
sparseValues = new IntIntHashMap();
|
||||
values = null;
|
||||
} else {
|
||||
sparseValues = null;
|
||||
values = new int[taxoReader.getSize()];
|
||||
}
|
||||
}
|
||||
|
@ -85,6 +99,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
|
||||
/** Rolls up any single-valued hierarchical dimensions. */
|
||||
void rollup() throws IOException {
|
||||
if (initialized == false) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Rollup any necessary dims:
|
||||
int[] children = null;
|
||||
for (Map.Entry<String, DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
|
@ -161,7 +179,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return getValue(ord);
|
||||
return initialized ? getValue(ord) : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -173,6 +191,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
return null;
|
||||
}
|
||||
|
||||
if (initialized == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int aggregatedValue = 0;
|
||||
|
||||
IntArrayList ordinals = new IntArrayList();
|
||||
|
@ -239,6 +261,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
return null;
|
||||
}
|
||||
|
||||
if (initialized == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
|
||||
return createFacetResult(topChildrenForPath, dim, path);
|
||||
}
|
||||
|
@ -324,6 +350,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
throw new IllegalArgumentException("topN must be > 0");
|
||||
}
|
||||
|
||||
if (initialized == false) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
// get children and siblings ordinal array from TaxonomyFacets
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
|
|
|
@ -88,7 +88,7 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
|
|||
FacetsCollector fc,
|
||||
AssociationAggregationFunction aggregationFunction)
|
||||
throws IOException {
|
||||
super(indexFieldName, taxoReader, aggregationFunction, config);
|
||||
super(indexFieldName, taxoReader, aggregationFunction, config, fc);
|
||||
aggregateValues(aggregationFunction, fc.getMatchingDocs());
|
||||
}
|
||||
|
||||
|
@ -104,7 +104,7 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
|
|||
AssociationAggregationFunction aggregationFunction,
|
||||
DoubleValuesSource valuesSource)
|
||||
throws IOException {
|
||||
super(indexFieldName, taxoReader, aggregationFunction, config);
|
||||
super(indexFieldName, taxoReader, aggregationFunction, config, fc);
|
||||
aggregateValues(aggregationFunction, fc.getMatchingDocs(), fc.getKeepScores(), valuesSource);
|
||||
}
|
||||
|
||||
|
@ -134,6 +134,11 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
|
|||
DoubleValuesSource valueSource)
|
||||
throws IOException {
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
initializeValueCounters();
|
||||
|
||||
SortedNumericDocValues ordinalValues =
|
||||
DocValues.getSortedNumeric(hits.context.reader(), indexFieldName);
|
||||
DoubleValues scores = keepScores ? scores(hits) : null;
|
||||
|
@ -164,6 +169,11 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets {
|
|||
throws IOException {
|
||||
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
initializeValueCounters();
|
||||
|
||||
BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName);
|
||||
DocIdSetIterator it =
|
||||
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));
|
||||
|
|
|
@ -63,6 +63,11 @@ public class TaxonomyFacetIntAssociations extends IntTaxonomyFacets {
|
|||
AssociationAggregationFunction aggregationFunction, List<MatchingDocs> matchingDocs)
|
||||
throws IOException {
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
if (hits.totalHits == 0) {
|
||||
continue;
|
||||
}
|
||||
initializeValueCounters();
|
||||
|
||||
BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName);
|
||||
DocIdSetIterator it = ConjunctionUtils.intersectIterators(List.of(hits.bits.iterator(), dv));
|
||||
|
||||
|
|
|
@ -19,11 +19,13 @@ package org.apache.lucene.facet.taxonomy;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.FacetsConfig.DimConfig;
|
||||
|
||||
|
@ -53,6 +55,9 @@ abstract class TaxonomyFacets extends Facets {
|
|||
/** {@code FacetsConfig} provided to the constructor. */
|
||||
final FacetsConfig config;
|
||||
|
||||
/** {@code FacetsCollector} provided to the constructor. */
|
||||
final FacetsCollector fc;
|
||||
|
||||
/** Maps parent ordinal to its child, or -1 if the parent is childless. */
|
||||
private int[] children;
|
||||
|
||||
|
@ -63,11 +68,13 @@ abstract class TaxonomyFacets extends Facets {
|
|||
final int[] parents;
|
||||
|
||||
/** Sole constructor. */
|
||||
TaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config)
|
||||
TaxonomyFacets(
|
||||
String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc)
|
||||
throws IOException {
|
||||
this.indexFieldName = indexFieldName;
|
||||
this.taxoReader = taxoReader;
|
||||
this.config = config;
|
||||
this.fc = fc;
|
||||
parents = taxoReader.getParallelTaxonomyArrays().parents();
|
||||
}
|
||||
|
||||
|
@ -138,6 +145,11 @@ abstract class TaxonomyFacets extends Facets {
|
|||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
validateTopN(topN);
|
||||
|
||||
if (hasValues() == false) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
int ord = children[TaxonomyReader.ROOT_ORDINAL];
|
||||
|
@ -158,4 +170,7 @@ abstract class TaxonomyFacets extends Facets {
|
|||
results.sort(BY_VALUE_THEN_DIM);
|
||||
return results;
|
||||
}
|
||||
|
||||
/** Were any values actually aggregated during counting? */
|
||||
abstract boolean hasValues();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue