LUCENE-7506: FastTaxonomyFacetCounts use ConjunctionDISI so cost is in proportion to size of intersected set of documents

This commit is contained in:
Mike McCandless 2016-10-19 10:04:39 -04:00
parent 731c5f9316
commit d03cc92b22
2 changed files with 24 additions and 22 deletions

View File

@ -40,6 +40,11 @@ Optimizations
in the sets of SHOULD and FILTER clauses, or both in MUST/FILTER and MUST_NOT
clauses. (Spyros Kapnissis via Adrien Grand, Uwe Schindler)
* LUCENE-7506: FastTaxonomyFacetCounts should use CPU in proportion to
the size of the intersected set of hits from the query and documents
that have a facet value, so sparse faceting works as expected
(Adrien Grand via Mike McCandless)
Other
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)

View File

@ -17,12 +17,14 @@
package org.apache.lucene.facet.taxonomy;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.search.ConjunctionDISI;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
@ -55,29 +57,24 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
continue;
}
DocIdSetIterator docs = hits.bits.iterator();
DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList(
hits.bits.iterator(), dv));
int doc;
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (dv.docID() < doc) {
dv.advance(doc);
}
if (dv.docID() == doc) {
final BytesRef bytesRef = dv.binaryValue();
byte[] bytes = bytesRef.bytes;
int end = bytesRef.offset + bytesRef.length;
int ord = 0;
int offset = bytesRef.offset;
int prev = 0;
while (offset < end) {
byte b = bytes[offset++];
if (b >= 0) {
prev = ord = ((ord << 7) | b) + prev;
++values[ord];
ord = 0;
} else {
ord = (ord << 7) | (b & 0x7F);
}
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
final BytesRef bytesRef = dv.binaryValue();
byte[] bytes = bytesRef.bytes;
int end = bytesRef.offset + bytesRef.length;
int ord = 0;
int offset = bytesRef.offset;
int prev = 0;
while (offset < end) {
byte b = bytes[offset++];
if (b >= 0) {
prev = ord = ((ord << 7) | b) + prev;
++values[ord];
ord = 0;
} else {
ord = (ord << 7) | (b & 0x7F);
}
}
}