LUCENE-10379: Count directly into the dense values array in FastTaxonomyFacetCounts#countAll (#605)

Co-authored-by: guofeng.my <guofeng.my@bytedance.com>
This commit is contained in:
Greg Miller 2022-01-13 09:17:55 -08:00 committed by GitHub
parent bd2cc4124d
commit 2f5e3c323b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 20 deletions

View File

@ -156,6 +156,9 @@ Optimizations
* LUCENE-10356: Further optimize facet counting for single-valued TaxonomyFacetCounts. (Greg Miller)
* LUCENE-10379: Count directly into the dense values array in FastTaxonomyFacetCounts#countAll.
(Guo Feng, Greg Miller)
Changes in runtime behavior
---------------------

View File

@ -84,13 +84,27 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), valuesIt));
if (singleValued != null) {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
increment((int) singleValued.longValue());
if (values != null) {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
values[(int) singleValued.longValue()]++;
}
} else {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
sparseValues.addTo((int) singleValued.longValue(), 1);
}
}
} else {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
for (int i = 0; i < multiValued.docValueCount(); i++) {
increment((int) multiValued.nextValue());
if (values != null) {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
for (int i = 0; i < multiValued.docValueCount(); i++) {
values[(int) multiValued.nextValue()]++;
}
}
} else {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
for (int i = 0; i < multiValued.docValueCount(); i++) {
sparseValues.addTo((int) multiValued.nextValue(), 1);
}
}
}
}
@ -100,6 +114,7 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
}
private void countAll(IndexReader reader) throws IOException {
assert values != null;
for (LeafReaderContext context : reader.leaves()) {
SortedNumericDocValues multiValued =
context.reader().getSortedNumericDocValues(indexFieldName);
@ -117,19 +132,18 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
if (liveDocs != null && liveDocs.get(doc) == false) {
continue;
}
increment((int) singleValued.longValue());
values[(int) singleValued.longValue()]++;
}
continue;
}
for (int doc = multiValued.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = multiValued.nextDoc()) {
if (liveDocs != null && liveDocs.get(doc) == false) {
continue;
}
for (int i = 0; i < multiValued.docValueCount(); i++) {
increment((int) multiValued.nextValue());
} else {
for (int doc = multiValued.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = multiValued.nextDoc()) {
if (liveDocs != null && liveDocs.get(doc) == false) {
continue;
}
for (int i = 0; i < multiValued.docValueCount(); i++) {
values[(int) multiValued.nextValue()]++;
}
}
}
}

View File

@ -31,10 +31,21 @@ import org.apache.lucene.facet.TopOrdAndIntQueue;
/** Base class for all taxonomy-based facets that aggregate to a per-ords int[]. */
public abstract class IntTaxonomyFacets extends TaxonomyFacets {
/** Per-ordinal value. */
private final int[] values;
/**
* Dense ordinal values.
*
* <p>We are making this and {@link #sparseValues} protected for some expert usage. e.g. It can be
* checked which is being used before a loop instead of calling {@link #increment} for each
* iteration.
*/
protected final int[] values;
private final IntIntHashMap sparseValues;
/**
* Sparse ordinal values.
*
* @see #values for why protected.
*/
protected final IntIntHashMap sparseValues;
/** Sole constructor. */
protected IntTaxonomyFacets(