LUCENE-10356: Further optimize facet counting for single-valued TaxonomyFacetCounts (#585)

This commit is contained in:
Greg Miller 2022-01-10 10:23:46 -08:00 committed by GitHub
parent eb0b1bf9f1
commit cf12b46092
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 48 additions and 25 deletions

View File

@ -150,6 +150,8 @@ Optimizations
* LUCENE-10350: Avoid some duplicate null check in facet counting for TaxonomyFacetCounts. (Guo Feng)
* LUCENE-10356: Further optimize facet counting for single-valued TaxonomyFacetCounts. (Greg Miller)
Changes in runtime behavior
---------------------

View File

@ -71,24 +71,40 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
private void count(List<MatchingDocs> matchingDocs) throws IOException {
for (MatchingDocs hits : matchingDocs) {
SortedNumericDocValues dv = hits.context.reader().getSortedNumericDocValues(indexFieldName);
if (dv == null) {
SortedNumericDocValues multiValued =
hits.context.reader().getSortedNumericDocValues(indexFieldName);
if (multiValued == null) {
continue;
}
DocIdSetIterator it =
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));
NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued);
if (values != null) {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
for (int i = 0; i < dv.docValueCount(); i++) {
values[(int) dv.nextValue()]++;
DocIdSetIterator valuesIt = singleValued != null ? singleValued : multiValued;
DocIdSetIterator it =
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), valuesIt));
if (singleValued != null) {
if (values != null) {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
values[(int) singleValued.longValue()]++;
}
} else {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
sparseValues.addTo((int) singleValued.longValue(), 1);
}
}
} else {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
for (int i = 0; i < dv.docValueCount(); i++) {
sparseValues.addTo((int) dv.nextValue(), 1);
if (values != null) {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
for (int i = 0; i < multiValued.docValueCount(); i++) {
values[(int) multiValued.nextValue()]++;
}
}
} else {
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
for (int i = 0; i < multiValued.docValueCount(); i++) {
sparseValues.addTo((int) multiValued.nextValue(), 1);
}
}
}
}
@ -100,40 +116,45 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
private final void countAll(IndexReader reader) throws IOException {
assert values != null;
for (LeafReaderContext context : reader.leaves()) {
SortedNumericDocValues dv = context.reader().getSortedNumericDocValues(indexFieldName);
if (dv == null) {
SortedNumericDocValues multiValued =
context.reader().getSortedNumericDocValues(indexFieldName);
if (multiValued == null) {
continue;
}
Bits liveDocs = context.reader().getLiveDocs();
NumericDocValues ndv = DocValues.unwrapSingleton(dv);
NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued);
if (ndv != null) {
if (singleValued != null) {
if (liveDocs == null) {
while (ndv.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
values[(int) ndv.longValue()]++;
while (singleValued.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
values[(int) singleValued.longValue()]++;
}
} else {
for (int doc = ndv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = ndv.nextDoc()) {
for (int doc = singleValued.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = singleValued.nextDoc()) {
if (liveDocs.get(doc)) {
values[(int) ndv.longValue()]++;
values[(int) singleValued.longValue()]++;
}
}
}
} else {
if (liveDocs == null) {
while (dv.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
final int dvCount = dv.docValueCount();
while (multiValued.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
final int dvCount = multiValued.docValueCount();
for (int i = 0; i < dvCount; i++) {
values[(int) dv.nextValue()]++;
values[(int) multiValued.nextValue()]++;
}
}
} else {
for (int doc = dv.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = dv.nextDoc()) {
for (int doc = multiValued.nextDoc();
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = multiValued.nextDoc()) {
if (liveDocs.get(doc)) {
final int dvCount = dv.docValueCount();
final int dvCount = multiValued.docValueCount();
for (int i = 0; i < dvCount; i++) {
values[(int) dv.nextValue()]++;
values[(int) multiValued.nextValue()]++;
}
}
}