mirror of https://github.com/apache/lucene.git
SOLR-5512: Optimize DocValuesFacets
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1546675 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7bae4083e1
commit
cafdce601c
|
@ -174,6 +174,8 @@ Optimizations
|
|||
* SOLR-5189: Solr 4.x Web UI Log Viewer does not display 'date' column from
|
||||
logs (steffkes)
|
||||
|
||||
* SOLR-5512: Optimize DocValuesFacets. (Robert Muir)
|
||||
|
||||
Other Changes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -232,9 +232,20 @@ public class DocValuesFacets {
|
|||
return res;
|
||||
}
|
||||
|
||||
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space */
|
||||
// specialized since the single-valued case is different
|
||||
/** accumulates per-segment single-valued facet counts */
|
||||
static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) {
|
||||
// no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic):
|
||||
// collect separately per-segment, then map to global ords
|
||||
accumSingleSeg(counts, si, disi, subIndex, map);
|
||||
} else {
|
||||
// otherwise: do collect+map on the fly
|
||||
accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map);
|
||||
}
|
||||
}
|
||||
|
||||
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space on-the-fly */
|
||||
static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int term = si.getOrd(doc);
|
||||
|
@ -246,8 +257,41 @@ public class DocValuesFacets {
|
|||
}
|
||||
}
|
||||
|
||||
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space */
|
||||
/** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */
|
||||
static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
// First count in seg-ord space:
|
||||
final int segCounts[];
|
||||
if (map == null) {
|
||||
segCounts = counts;
|
||||
} else {
|
||||
segCounts = new int[1+si.getValueCount()];
|
||||
}
|
||||
|
||||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
segCounts[1+si.getOrd(doc)]++;
|
||||
}
|
||||
|
||||
// migrate to global ords (if necessary)
|
||||
if (map != null) {
|
||||
migrateGlobal(counts, segCounts, subIndex, map);
|
||||
}
|
||||
}
|
||||
|
||||
/** accumulates per-segment multi-valued facet counts */
|
||||
static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) {
|
||||
// no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic):
|
||||
// collect separately per-segment, then map to global ords
|
||||
accumMultiSeg(counts, si, disi, subIndex, map);
|
||||
} else {
|
||||
// otherwise: do collect+map on the fly
|
||||
accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map);
|
||||
}
|
||||
}
|
||||
|
||||
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */
|
||||
static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
si.setDocument(doc);
|
||||
|
@ -269,4 +313,47 @@ public class DocValuesFacets {
|
|||
} while ((term = (int) si.nextOrd()) >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
/** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */
|
||||
static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
|
||||
// First count in seg-ord space:
|
||||
final int segCounts[];
|
||||
if (map == null) {
|
||||
segCounts = counts;
|
||||
} else {
|
||||
segCounts = new int[1+(int)si.getValueCount()];
|
||||
}
|
||||
|
||||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
si.setDocument(doc);
|
||||
int term = (int) si.nextOrd();
|
||||
if (term < 0) {
|
||||
counts[0]++; // missing
|
||||
} else {
|
||||
do {
|
||||
segCounts[1+term]++;
|
||||
} while ((term = (int)si.nextOrd()) >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
// migrate to global ords (if necessary)
|
||||
if (map != null) {
|
||||
migrateGlobal(counts, segCounts, subIndex, map);
|
||||
}
|
||||
}
|
||||
|
||||
/** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) */
|
||||
static void migrateGlobal(int counts[], int segCounts[], int subIndex, OrdinalMap map) {
|
||||
// missing count
|
||||
counts[0] += segCounts[0];
|
||||
|
||||
// migrate actual ordinals
|
||||
for (int ord = 1; ord < segCounts.length; ord++) {
|
||||
int count = segCounts[ord];
|
||||
if (count != 0) {
|
||||
counts[1+(int) map.getGlobalOrd(subIndex, ord-1)] += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -566,7 +566,7 @@ public class SimpleFacets {
|
|||
throw se;
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Exception during facet.field: " + workerFacetValue, e.getCause());
|
||||
"Exception during facet.field: " + workerFacetValue, e);
|
||||
} finally {
|
||||
semaphore.release();
|
||||
}
|
||||
|
|
|
@ -296,11 +296,11 @@ public class BitDocSet extends DocSetBase {
|
|||
@Override
|
||||
public long cost() {
|
||||
// we don't want to actually compute cardinality, but
|
||||
// if its already been computed, we use it
|
||||
// if its already been computed, we use it (pro-rated for the segment)
|
||||
if (size != -1) {
|
||||
return size;
|
||||
return (long)(size * ((OpenBitSet.bits2words(maxDoc)<<6) / (float)bs.capacity()));
|
||||
} else {
|
||||
return bs.capacity();
|
||||
return maxDoc;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue