mirror of https://github.com/apache/lucene.git
LUCENE-7506: FastTaxonomyFacetCounts use ConjunctionDISI so cost is in proportion to size of intersected set of documents
This commit is contained in:
parent
731c5f9316
commit
d03cc92b22
|
@ -40,6 +40,11 @@ Optimizations
|
|||
in the sets of SHOULD and FILTER clauses, or both in MUST/FILTER and MUST_NOT
|
||||
clauses. (Spyros Kapnissis via Adrien Grand, Uwe Schindler)
|
||||
|
||||
* LUCENE-7506: FastTaxonomyFacetCounts should use CPU in proportion to
|
||||
the size of the intersected set of hits from the query and documents
|
||||
that have a facet value, so sparse faceting works as expected
|
||||
(Adrien Grand via Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
|
||||
|
|
|
@ -17,12 +17,14 @@
|
|||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.search.ConjunctionDISI;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -55,29 +57,24 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
|||
continue;
|
||||
}
|
||||
|
||||
DocIdSetIterator docs = hits.bits.iterator();
|
||||
DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList(
|
||||
hits.bits.iterator(), dv));
|
||||
|
||||
int doc;
|
||||
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (dv.docID() < doc) {
|
||||
dv.advance(doc);
|
||||
}
|
||||
if (dv.docID() == doc) {
|
||||
final BytesRef bytesRef = dv.binaryValue();
|
||||
byte[] bytes = bytesRef.bytes;
|
||||
int end = bytesRef.offset + bytesRef.length;
|
||||
int ord = 0;
|
||||
int offset = bytesRef.offset;
|
||||
int prev = 0;
|
||||
while (offset < end) {
|
||||
byte b = bytes[offset++];
|
||||
if (b >= 0) {
|
||||
prev = ord = ((ord << 7) | b) + prev;
|
||||
++values[ord];
|
||||
ord = 0;
|
||||
} else {
|
||||
ord = (ord << 7) | (b & 0x7F);
|
||||
}
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
final BytesRef bytesRef = dv.binaryValue();
|
||||
byte[] bytes = bytesRef.bytes;
|
||||
int end = bytesRef.offset + bytesRef.length;
|
||||
int ord = 0;
|
||||
int offset = bytesRef.offset;
|
||||
int prev = 0;
|
||||
while (offset < end) {
|
||||
byte b = bytes[offset++];
|
||||
if (b >= 0) {
|
||||
prev = ord = ((ord << 7) | b) + prev;
|
||||
++values[ord];
|
||||
ord = 0;
|
||||
} else {
|
||||
ord = (ord << 7) | (b & 0x7F);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue