From 179b45bc23e4496278b7058811577b66ef3af77d Mon Sep 17 00:00:00 2001 From: Greg Miller Date: Tue, 25 Jul 2023 12:20:42 -0700 Subject: [PATCH] Initialize facet counting data structures lazily (#12408) This change covers: * Taxonomy faceting * FastTaxonomyFacetCounts * TaxonomyFacetIntAssociations * TaxonomyFacetFloatAssociations * SSDV faceting * SortedSetDocValuesFacetCounts * ConcurrentSortedSetDocValuesFacetCounts * StringValueFacetCounts * Range faceting: * LongRangeFacetCounts * DoubleRangeFacetCounts * Long faceting: * LongValueFacetCounts Left for a future iteration: * RangeOnRange faceting * FacetSet faceting --- lucene/CHANGES.txt | 3 + .../lucene/facet/LongValueFacetCounts.java | 84 +++++++++++++++---- .../lucene/facet/StringValueFacetCounts.java | 48 ++++++++--- .../facet/range/DoubleRangeFacetCounts.java | 44 ++++++---- .../facet/range/LongRangeFacetCounts.java | 42 +++++++--- .../lucene/facet/range/RangeFacetCounts.java | 46 +++++++--- .../AbstractSortedSetDocValueFacetCounts.java | 22 ++++- ...ncurrentSortedSetDocValuesFacetCounts.java | 12 +++ .../SortedSetDocValuesFacetCounts.java | 26 +++++- .../taxonomy/FastTaxonomyFacetCounts.java | 7 +- .../facet/taxonomy/FloatTaxonomyFacets.java | 39 +++++++-- .../facet/taxonomy/IntTaxonomyFacets.java | 42 ++++++++-- .../TaxonomyFacetFloatAssociations.java | 14 +++- .../TaxonomyFacetIntAssociations.java | 5 ++ .../lucene/facet/taxonomy/TaxonomyFacets.java | 17 +++- 15 files changed, 366 insertions(+), 85 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index bc0efa48906..560b7117125 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -161,6 +161,9 @@ Optimizations * GITHUB#12453: Faster bulk numeric reads from BufferedIndexInput (Armin Braun) +* GITHUB#12408: Lazy initialization improvements for Facets implementations when there are segments with no hits + to count. (Greg Miller) + Bug Fixes --------------------- diff --git a/lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java index 0deffe7b90e..d39b0e847a7 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/LongValueFacetCounts.java @@ -51,10 +51,13 @@ import org.apache.lucene.util.PriorityQueue; public class LongValueFacetCounts extends Facets { /** Used for all values that are < 1K. */ - private final int[] counts = new int[1024]; + private int[] counts; /** Used for all values that are >= 1K. */ - private final LongIntHashMap hashCounts = new LongIntHashMap(); + private LongIntHashMap hashCounts; + + /** Whether-or-not counters have been initialized. */ + private boolean initialized; /** Field being counted. */ private final String field; @@ -125,6 +128,7 @@ public class LongValueFacetCounts extends Facets { public LongValueFacetCounts(String field, LongValuesSource valueSource, IndexReader reader) throws IOException { this.field = field; + initializeCounters(); if (valueSource != null) { countAll(reader, valueSource); } else { @@ -141,6 +145,7 @@ public class LongValueFacetCounts extends Facets { public LongValueFacetCounts(String field, MultiLongValuesSource valuesSource, IndexReader reader) throws IOException { this.field = field; + initializeCounters(); if (valuesSource != null) { LongValuesSource singleValued = MultiLongValuesSource.unwrapSingleton(valuesSource); if (singleValued != null) { @@ -153,11 +158,25 @@ public class LongValueFacetCounts extends Facets { } } + private void initializeCounters() { + if (initialized) { + return; + } + assert counts == null && hashCounts == null; + initialized = true; + counts = new int[1024]; + hashCounts = new LongIntHashMap(); + } + /** Counts from the provided valueSource. */ private void count(LongValuesSource valueSource, List matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { + if (hits.totalHits == 0) { + continue; + } + initializeCounters(); LongValues fv = valueSource.getValues(hits.context, null); @@ -183,6 +202,10 @@ public class LongValueFacetCounts extends Facets { private void count(MultiLongValuesSource valuesSource, List matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { + if (hits.totalHits == 0) { + continue; + } + initializeCounters(); MultiLongValues multiValues = valuesSource.getValues(hits.context); @@ -213,6 +236,10 @@ public class LongValueFacetCounts extends Facets { /** Counts from the field's indexed doc values. */ private void count(String field, List matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { + if (hits.totalHits == 0) { + continue; + } + initializeCounters(); SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field); NumericDocValues singleValues = DocValues.unwrapSingleton(multiValues); @@ -350,6 +377,13 @@ public class LongValueFacetCounts extends Facets { @Override public FacetResult getAllChildren(String dim, String... path) throws IOException { validateDimAndPathForGetChildren(dim, path); + + if (initialized == false) { + // nothing was counted (either no hits or no values for all hits): + assert totCount == 0; + return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0); + } + List labelValues = new ArrayList<>(); for (int i = 0; i < counts.length; i++) { if (counts[i] != 0) { @@ -378,6 +412,12 @@ public class LongValueFacetCounts extends Facets { validateTopN(topN); validateDimAndPathForGetChildren(dim, path); + if (initialized == false) { + // nothing was counted (either no hits or no values for all hits): + assert totCount == 0; + return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0); + } + PriorityQueue pq = new PriorityQueue<>(Math.min(topN, counts.length + hashCounts.size())) { @Override @@ -440,6 +480,12 @@ public class LongValueFacetCounts extends Facets { * efficient to use {@link #getAllChildren(String, String...)}. */ public FacetResult getAllChildrenSortByValue() { + if (initialized == false) { + // nothing was counted (either no hits or no values for all hits): + assert totCount == 0; + return new FacetResult(field, new String[0], totCount, new LabelAndValue[0], 0); + } + List labelValues = new ArrayList<>(); // compact & sort hash table's arrays by value @@ -533,27 +579,29 @@ public class LongValueFacetCounts extends Facets { StringBuilder b = new StringBuilder(); b.append("LongValueFacetCounts totCount="); b.append(totCount); - b.append(":\n"); - for (int i = 0; i < counts.length; i++) { - if (counts[i] != 0) { - b.append(" "); - b.append(i); - b.append(" -> count="); - b.append(counts[i]); - b.append('\n'); - } - } - - if (hashCounts.size() != 0) { - for (LongIntCursor c : hashCounts) { - if (c.value != 0) { + if (initialized) { + b.append(":\n"); + for (int i = 0; i < counts.length; i++) { + if (counts[i] != 0) { b.append(" "); - b.append(c.key); + b.append(i); b.append(" -> count="); - b.append(c.value); + b.append(counts[i]); b.append('\n'); } } + + if (hashCounts.size() != 0) { + for (LongIntCursor c : hashCounts) { + if (c.value != 0) { + b.append(" "); + b.append(c.key); + b.append(" -> count="); + b.append(c.value); + b.append('\n'); + } + } + } } return b.toString(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java index 957efec0747..335f93d56d3 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java @@ -69,8 +69,9 @@ public class StringValueFacetCounts extends Facets { private final OrdinalMap ordinalMap; private final SortedSetDocValues docValues; - private final int[] denseCounts; + private int[] denseCounts; private final IntIntHashMap sparseCounts; + private boolean initialized; private final int cardinality; private int totalDocCount; @@ -101,7 +102,9 @@ public class StringValueFacetCounts extends Facets { if (facetsCollector != null) { if (cardinality < 1024) { // count densely for low cardinality sparseCounts = null; - denseCounts = new int[cardinality]; + denseCounts = null; + initialized = false; + count(facetsCollector); } else { int totalHits = 0; int totalDocs = 0; @@ -110,22 +113,31 @@ public class StringValueFacetCounts extends Facets { totalDocs += matchingDocs.context.reader().maxDoc(); } - // If our result set is < 10% of the index, we collect sparsely (use hash map). This - // heuristic is borrowed from IntTaxonomyFacetCounts: - if (totalHits < totalDocs / 10) { - sparseCounts = new IntIntHashMap(); - denseCounts = null; - } else { + // No counting needed if there are no hits: + if (totalHits == 0) { sparseCounts = null; - denseCounts = new int[cardinality]; + denseCounts = null; + initialized = true; + } else { + // If our result set is < 10% of the index, we collect sparsely (use hash map). This + // heuristic is borrowed from IntTaxonomyFacetCounts: + if (totalHits < totalDocs / 10) { + sparseCounts = new IntIntHashMap(); + denseCounts = null; + initialized = true; + } else { + sparseCounts = null; + denseCounts = new int[cardinality]; + initialized = true; + } + count(facetsCollector); } } - - count(facetsCollector); } else { // Since we're counting all ordinals, count densely: sparseCounts = null; denseCounts = new int[cardinality]; + initialized = true; countAll(); } @@ -294,6 +306,9 @@ public class StringValueFacetCounts extends Facets { if (matchingDocs.size() == 1) { FacetsCollector.MatchingDocs hits = matchingDocs.get(0); + if (hits.totalHits == 0) { + return; + } // Validate state before doing anything else: validateState(hits.context); @@ -314,6 +329,10 @@ public class StringValueFacetCounts extends Facets { assert ordinalMap != null; assert docValues instanceof MultiDocValues.MultiSortedSetDocValues; + if (hits.totalHits == 0) { + continue; + } + MultiDocValues.MultiSortedSetDocValues multiValues = (MultiDocValues.MultiSortedSetDocValues) docValues; @@ -368,6 +387,13 @@ public class StringValueFacetCounts extends Facets { FacetsCollector.MatchingDocs hits, Bits liveDocs) throws IOException { + if (initialized == false) { + assert denseCounts == null && sparseCounts == null; + // If the counters weren't initialized, we can assume the cardinality is low enough that + // dense counting will be preferrable: + denseCounts = new int[cardinality]; + initialized = true; + } // It's slightly more efficient to work against SortedDocValues if the field is actually // single-valued (see: LUCENE-5309) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java index ae5ac429535..701d2471bb4 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java @@ -157,20 +157,25 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts { private void count(DoubleValuesSource valueSource, List matchingDocs) throws IOException { - LongRange[] longRanges = getLongRanges(); - - LongRangeCounter counter = LongRangeCounter.create(longRanges, counts); - + LongRangeCounter counter = null; int missingCount = 0; for (MatchingDocs hits : matchingDocs) { - DoubleValues fv = valueSource.getValues(hits.context, null); - totCount += hits.totalHits; + if (hits.totalHits == 0) { + continue; + } final DocIdSetIterator it = createIterator(hits); if (it == null) { continue; } + if (counter == null) { + counter = setupCounter(); + } + + DoubleValues fv = valueSource.getValues(hits.context, null); + totCount += hits.totalHits; + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { // Skip missing docs: if (fv.advanceExact(doc)) { @@ -183,27 +188,34 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts { } } - missingCount += counter.finish(); - totCount -= missingCount; + if (counter != null) { + missingCount += counter.finish(); + totCount -= missingCount; + } } /** Counts from the provided valueSource. */ private void count(MultiDoubleValuesSource valueSource, List matchingDocs) throws IOException { - LongRange[] longRanges = getLongRanges(); - - LongRangeCounter counter = LongRangeCounter.create(longRanges, counts); - + LongRangeCounter counter = null; // LongRangeCounter.create(longRanges, counts); int missingCount = 0; for (MatchingDocs hits : matchingDocs) { - MultiDoubleValues multiValues = valueSource.getValues(hits.context); + if (hits.totalHits == 0) { + continue; + } final DocIdSetIterator it = createIterator(hits); if (it == null) { continue; } + if (counter == null) { + counter = setupCounter(); + } + + MultiDoubleValues multiValues = valueSource.getValues(hits.context); + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { // Skip missing docs: if (multiValues.advanceExact(doc)) { @@ -232,8 +244,10 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts { } } - missingCount += counter.finish(); - totCount -= missingCount; + if (counter != null) { + missingCount += counter.finish(); + totCount -= missingCount; + } } /** Create long ranges from the double ranges. */ diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java index 42a2e4c3bc2..34aa3fcd062 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java @@ -128,21 +128,27 @@ public class LongRangeFacetCounts extends RangeFacetCounts { private void count(LongValuesSource valueSource, List matchingDocs) throws IOException { - LongRange[] ranges = getLongRanges(); - - LongRangeCounter counter = LongRangeCounter.create(ranges, counts); + LongRangeCounter counter = null; int missingCount = 0; for (MatchingDocs hits : matchingDocs) { - LongValues fv = valueSource.getValues(hits.context, null); - totCount += hits.totalHits; + if (hits.totalHits == 0) { + continue; + } final DocIdSetIterator it = createIterator(hits); if (it == null) { continue; } + if (counter == null) { + counter = setupCounter(); + } + + LongValues fv = valueSource.getValues(hits.context, null); + totCount += hits.totalHits; + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { // Skip missing docs: if (fv.advanceExact(doc)) { @@ -155,26 +161,34 @@ public class LongRangeFacetCounts extends RangeFacetCounts { } } - missingCount += counter.finish(); - totCount -= missingCount; + if (counter != null) { + missingCount += counter.finish(); + totCount -= missingCount; + } } /** Counts from the provided valueSource. */ private void count(MultiLongValuesSource valueSource, List matchingDocs) throws IOException { - LongRange[] ranges = getLongRanges(); - - LongRangeCounter counter = LongRangeCounter.create(ranges, counts); + LongRangeCounter counter = null; for (MatchingDocs hits : matchingDocs) { - MultiLongValues multiValues = valueSource.getValues(hits.context); + if (hits.totalHits == 0) { + continue; + } final DocIdSetIterator it = createIterator(hits); if (it == null) { continue; } + if (counter == null) { + counter = setupCounter(); + } + + MultiLongValues multiValues = valueSource.getValues(hits.context); + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { // Skip missing docs: if (multiValues.advanceExact(doc)) { @@ -203,8 +217,10 @@ public class LongRangeFacetCounts extends RangeFacetCounts { } } - int missingCount = counter.finish(); - totCount -= missingCount; + if (counter != null) { + int missingCount = counter.finish(); + totCount -= missingCount; + } } @Override diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java index 7cff801e30a..8bf9c352e34 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java @@ -39,8 +39,8 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery { /** Ranges passed to constructor. */ protected final Range[] ranges; - /** Counts, initialized in by subclass. */ - protected final int[] counts; + /** Counts. */ + protected int[] counts; /** Our field name. */ protected final String field; @@ -53,7 +53,6 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery { super(fastMatchQuery); this.field = field; this.ranges = ranges; - counts = new int[ranges.length]; } protected abstract LongRange[] getLongRanges(); @@ -62,6 +61,12 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery { return l; } + protected LongRangeCounter setupCounter() { + assert counts == null; + counts = new int[ranges.length]; + return LongRangeCounter.create(getLongRanges(), counts); + } + /** Counts from the provided field. */ protected void count(String field, List matchingDocs) throws IOException { @@ -69,15 +74,20 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery { // load doc values for all segments up front and keep track of whether-or-not we found any that // were actually multi-valued. this allows us to optimize the case where all segments contain // single-values. - SortedNumericDocValues[] multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()]; + SortedNumericDocValues[] multiValuedDocVals = null; NumericDocValues[] singleValuedDocVals = null; boolean foundMultiValued = false; for (int i = 0; i < matchingDocs.size(); i++) { - FacetsCollector.MatchingDocs hits = matchingDocs.get(i); + if (hits.totalHits == 0) { + continue; + } SortedNumericDocValues multiValues = DocValues.getSortedNumeric(hits.context.reader(), field); + if (multiValuedDocVals == null) { + multiValuedDocVals = new SortedNumericDocValues[matchingDocs.size()]; + } multiValuedDocVals[i] = multiValues; // only bother trying to unwrap a singleton if we haven't yet seen any true multi-valued cases @@ -94,6 +104,11 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery { } } + if (multiValuedDocVals == null) { + // no hits or no doc values in all segments. nothing to count: + return; + } + // we only need to keep around one or the other at this point if (foundMultiValued) { singleValuedDocVals = null; @@ -101,7 +116,7 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery { multiValuedDocVals = null; } - LongRangeCounter counter = LongRangeCounter.create(getLongRanges(), counts); + LongRangeCounter counter = setupCounter(); int missingCount = 0; @@ -183,9 +198,15 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery { @Override public FacetResult getAllChildren(String dim, String... path) throws IOException { validateDimAndPathForGetChildren(dim, path); - LabelAndValue[] labelValues = new LabelAndValue[counts.length]; - for (int i = 0; i < counts.length; i++) { - labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]); + LabelAndValue[] labelValues = new LabelAndValue[ranges.length]; + if (counts == null) { + for (int i = 0; i < ranges.length; i++) { + labelValues[i] = new LabelAndValue(ranges[i].label, 0); + } + } else { + for (int i = 0; i < ranges.length; i++) { + labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]); + } } return new FacetResult(dim, path, totCount, labelValues, labelValues.length); } @@ -195,6 +216,11 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery { validateTopN(topN); validateDimAndPathForGetChildren(dim, path); + if (counts == null) { + assert totCount == 0; + return new FacetResult(dim, path, totCount, new LabelAndValue[0], 0); + } + PriorityQueue pq = new PriorityQueue<>(Math.min(topN, counts.length)) { @Override @@ -251,7 +277,7 @@ abstract class RangeFacetCounts extends FacetCountsWithFilterQuery { b.append(" "); b.append(ranges[i].label); b.append(" -> count="); - b.append(counts[i]); + b.append(counts != null ? counts[i] : 0); b.append('\n'); } return b.toString(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java index ad3d412c97e..962530a21cd 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java @@ -19,6 +19,7 @@ package org.apache.lucene.facet.sortedset; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; @@ -68,6 +69,9 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets { @Override public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { validateTopN(topN); + if (hasCounts() == false) { + return null; + } TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim, path); return createFacetResult(topChildrenForPath, dim, path); } @@ -80,6 +84,10 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets { return null; } + if (hasCounts() == false) { + return null; + } + // Compute the actual results: int pathCount = 0; List labelValues = new ArrayList<>(); @@ -111,12 +119,17 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets { return -1; } - return getCount(ord); + return hasCounts() == false ? 0 : getCount(ord); } @Override public List getAllDims(int topN) throws IOException { validateTopN(topN); + + if (hasCounts() == false) { + return Collections.emptyList(); + } + List results = new ArrayList<>(); for (String dim : state.getDims()) { TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim); @@ -136,6 +149,10 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets { validateTopN(topNDims); validateTopN(topNChildren); + if (hasCounts() == false) { + return Collections.emptyList(); + } + // Creates priority queue to store top dimensions and sort by their aggregated values/hits and // string values. PriorityQueue pq = @@ -230,6 +247,9 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets { return Arrays.asList(results); } + /** Were any counts actually computed? (They may not be if there are no hits, etc.) */ + abstract boolean hasCounts(); + /** Retrieve the count for a specified ordinal. */ abstract int getCount(int ord); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java index 221836cea05..0b03da9135b 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java @@ -77,6 +77,13 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends AbstractSortedSetDo } } + @Override + boolean hasCounts() { + // TODO: safe to always assume there are counts, but maybe it would be more optimal to + // actually track if we see a count? + return true; + } + @Override int getCount(int ord) { return counts.get(ord); @@ -99,6 +106,11 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends AbstractSortedSetDo @Override public Void call() throws IOException { + // If we're counting collected hits but there were none, short-circuit: + if (hits != null && hits.totalHits == 0) { + return null; + } + SortedSetDocValues multiValues = DocValues.getSortedSet(leafReader, field); if (multiValues == null) { // nothing to count here diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java index f167bf3b68b..6ecc08da9d7 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java @@ -56,7 +56,8 @@ import org.apache.lucene.util.LongValues; * @lucene.experimental */ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFacetCounts { - final int[] counts; + private final SortedSetDocValuesReaderState state; + int[] counts; /** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */ public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) throws IOException { @@ -67,7 +68,7 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state, FacetsCollector hits) throws IOException { super(state); - this.counts = new int[state.getSize()]; + this.state = state; if (hits == null) { // browse only countAll(); @@ -76,6 +77,17 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace } } + private void initializeCounts() { + if (counts == null) { + counts = new int[state.getSize()]; + } + } + + @Override + boolean hasCounts() { + return counts != null; + } + @Override int getCount(int ord) { return counts[ord]; @@ -90,6 +102,9 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace return; } + // Initialize counts: + initializeCounts(); + // It's slightly more efficient to work against SortedDocValues if the field is actually // single-valued (see: LUCENE-5309) SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues); @@ -159,12 +174,19 @@ public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFace private void countOneSegment( OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits, Bits liveDocs) throws IOException { + if (hits != null && hits.totalHits == 0) { + return; + } + SortedSetDocValues multiValues = DocValues.getSortedSet(reader, field); if (multiValues == null) { // nothing to count return; } + // Initialize counts: + initializeCounts(); + // It's slightly more efficient to work against SortedDocValues if the field is actually // single-valued (see: LUCENE-5309) SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java index 82c3b2a3af7..a68f7725db5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java @@ -71,11 +71,15 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets { private void count(List matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { + if (hits.totalHits == 0) { + continue; + } SortedNumericDocValues multiValued = hits.context.reader().getSortedNumericDocValues(indexFieldName); if (multiValued == null) { continue; } + initializeValueCounters(); NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued); @@ -114,13 +118,14 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets { } private void countAll(IndexReader reader) throws IOException { - assert values != null; for (LeafReaderContext context : reader.leaves()) { SortedNumericDocValues multiValued = context.reader().getSortedNumericDocValues(indexFieldName); if (multiValued == null) { continue; } + initializeValueCounters(); + assert values != null; Bits liveDocs = context.reader().getLiveDocs(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java index 35c71a4c661..fcf74c22141 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java @@ -20,10 +20,12 @@ import com.carrotsearch.hppc.FloatArrayList; import com.carrotsearch.hppc.IntArrayList; import java.io.IOException; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsConfig.DimConfig; import org.apache.lucene.facet.LabelAndValue; @@ -39,22 +41,37 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { final AssociationAggregationFunction aggregationFunction; /** Per-ordinal value. */ - final float[] values; + float[] values; /** Sole constructor. */ FloatTaxonomyFacets( String indexFieldName, TaxonomyReader taxoReader, AssociationAggregationFunction aggregationFunction, - FacetsConfig config) + FacetsConfig config, + FacetsCollector fc) throws IOException { - super(indexFieldName, taxoReader, config); + super(indexFieldName, taxoReader, config, fc); this.aggregationFunction = aggregationFunction; - values = new float[taxoReader.getSize()]; + } + + @Override + boolean hasValues() { + return values != null; + } + + void initializeValueCounters() { + if (values == null) { + values = new float[taxoReader.getSize()]; + } } /** Rolls up any single-valued hierarchical dimensions. */ void rollup() throws IOException { + if (values == null) { + return; + } + // Rollup any necessary dims: int[] children = getChildren(); for (Map.Entry ent : config.getDimConfigs().entrySet()) { @@ -100,7 +117,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { if (ord < 0) { return -1; } - return values[ord]; + return values == null ? 0 : values[ord]; } @Override @@ -112,6 +129,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { return null; } + if (values == null) { + return null; + } + int[] children = getChildren(); int[] siblings = getSiblings(); @@ -166,6 +187,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { return null; } + if (values == null) { + return null; + } + TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN); return createFacetResult(topChildrenForPath, dim, path); } @@ -264,6 +289,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets { validateTopN(topNDims); validateTopN(topNChildren); + if (values == null) { + return Collections.emptyList(); + } + // get existing children and siblings ordinal array from TaxonomyFacets int[] children = getChildren(); int[] siblings = getSiblings(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java index 9a8b94b573b..3a26d83000b 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java @@ -21,6 +21,7 @@ import com.carrotsearch.hppc.IntIntHashMap; import com.carrotsearch.hppc.cursors.IntIntCursor; import java.io.IOException; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -40,10 +41,13 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { final AssociationAggregationFunction aggregationFunction; /** Dense ordinal values. */ - final int[] values; + int[] values; /** Sparse ordinal values. */ - final IntIntHashMap sparseValues; + IntIntHashMap sparseValues; + + /** Have value counters been initialized. */ + boolean initialized; /** Sole constructor. */ IntTaxonomyFacets( @@ -53,14 +57,24 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { AssociationAggregationFunction aggregationFunction, FacetsCollector fc) throws IOException { - super(indexFieldName, taxoReader, config); + super(indexFieldName, taxoReader, config, fc); this.aggregationFunction = aggregationFunction; + } + @Override + boolean hasValues() { + return initialized; + } + + void initializeValueCounters() { + if (initialized) { + return; + } + initialized = true; + assert sparseValues == null && values == null; if (useHashTable(fc, taxoReader)) { sparseValues = new IntIntHashMap(); - values = null; } else { - sparseValues = null; values = new int[taxoReader.getSize()]; } } @@ -85,6 +99,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { /** Rolls up any single-valued hierarchical dimensions. */ void rollup() throws IOException { + if (initialized == false) { + return; + } + // Rollup any necessary dims: int[] children = null; for (Map.Entry ent : config.getDimConfigs().entrySet()) { @@ -161,7 +179,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { if (ord < 0) { return -1; } - return getValue(ord); + return initialized ? getValue(ord) : 0; } @Override @@ -173,6 +191,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { return null; } + if (initialized == false) { + return null; + } + int aggregatedValue = 0; IntArrayList ordinals = new IntArrayList(); @@ -239,6 +261,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { return null; } + if (initialized == false) { + return null; + } + TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN); return createFacetResult(topChildrenForPath, dim, path); } @@ -324,6 +350,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets { throw new IllegalArgumentException("topN must be > 0"); } + if (initialized == false) { + return Collections.emptyList(); + } + // get children and siblings ordinal array from TaxonomyFacets int[] children = getChildren(); int[] siblings = getSiblings(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetFloatAssociations.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetFloatAssociations.java index 1de29ca8a22..4903cc30040 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetFloatAssociations.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetFloatAssociations.java @@ -88,7 +88,7 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets { FacetsCollector fc, AssociationAggregationFunction aggregationFunction) throws IOException { - super(indexFieldName, taxoReader, aggregationFunction, config); + super(indexFieldName, taxoReader, aggregationFunction, config, fc); aggregateValues(aggregationFunction, fc.getMatchingDocs()); } @@ -104,7 +104,7 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets { AssociationAggregationFunction aggregationFunction, DoubleValuesSource valuesSource) throws IOException { - super(indexFieldName, taxoReader, aggregationFunction, config); + super(indexFieldName, taxoReader, aggregationFunction, config, fc); aggregateValues(aggregationFunction, fc.getMatchingDocs(), fc.getKeepScores(), valuesSource); } @@ -134,6 +134,11 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets { DoubleValuesSource valueSource) throws IOException { for (MatchingDocs hits : matchingDocs) { + if (hits.totalHits == 0) { + continue; + } + initializeValueCounters(); + SortedNumericDocValues ordinalValues = DocValues.getSortedNumeric(hits.context.reader(), indexFieldName); DoubleValues scores = keepScores ? scores(hits) : null; @@ -164,6 +169,11 @@ public class TaxonomyFacetFloatAssociations extends FloatTaxonomyFacets { throws IOException { for (MatchingDocs hits : matchingDocs) { + if (hits.totalHits == 0) { + continue; + } + initializeValueCounters(); + BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName); DocIdSetIterator it = ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv)); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetIntAssociations.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetIntAssociations.java index 09638bc6f30..f437efa0d8a 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetIntAssociations.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetIntAssociations.java @@ -63,6 +63,11 @@ public class TaxonomyFacetIntAssociations extends IntTaxonomyFacets { AssociationAggregationFunction aggregationFunction, List matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { + if (hits.totalHits == 0) { + continue; + } + initializeValueCounters(); + BinaryDocValues dv = DocValues.getBinary(hits.context.reader(), indexFieldName); DocIdSetIterator it = ConjunctionUtils.intersectIterators(List.of(hits.bits.iterator(), dv)); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java index ee5abb0abd3..97d3179e11e 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacets.java @@ -19,11 +19,13 @@ package org.apache.lucene.facet.taxonomy; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Locale; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsConfig.DimConfig; @@ -53,6 +55,9 @@ abstract class TaxonomyFacets extends Facets { /** {@code FacetsConfig} provided to the constructor. */ final FacetsConfig config; + /** {@code FacetsCollector} provided to the constructor. */ + final FacetsCollector fc; + /** Maps parent ordinal to its child, or -1 if the parent is childless. */ private int[] children; @@ -63,11 +68,13 @@ abstract class TaxonomyFacets extends Facets { final int[] parents; /** Sole constructor. */ - TaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) + TaxonomyFacets( + String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { this.indexFieldName = indexFieldName; this.taxoReader = taxoReader; this.config = config; + this.fc = fc; parents = taxoReader.getParallelTaxonomyArrays().parents(); } @@ -138,6 +145,11 @@ abstract class TaxonomyFacets extends Facets { @Override public List getAllDims(int topN) throws IOException { validateTopN(topN); + + if (hasValues() == false) { + return Collections.emptyList(); + } + int[] children = getChildren(); int[] siblings = getSiblings(); int ord = children[TaxonomyReader.ROOT_ORDINAL]; @@ -158,4 +170,7 @@ abstract class TaxonomyFacets extends Facets { results.sort(BY_VALUE_THEN_DIM); return results; } + + /** Were any values actually aggregated during counting? */ + abstract boolean hasValues(); }