SOLR-5512: Optimize DocValuesFacets

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1546675 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-11-30 01:32:26 +00:00
parent 7bae4083e1
commit cafdce601c
4 changed files with 96 additions and 7 deletions

View File

@ -174,6 +174,8 @@ Optimizations
* SOLR-5189: Solr 4.x Web UI Log Viewer does not display 'date' column from
logs (steffkes)
* SOLR-5512: Optimize DocValuesFacets. (Robert Muir)
Other Changes
---------------------

View File

@ -232,9 +232,20 @@ public class DocValuesFacets {
return res;
}
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space */
// specialized since the single-valued case is different
/** accumulates per-segment single-valued facet counts */
static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) {
// no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic):
// collect separately per-segment, then map to global ords
accumSingleSeg(counts, si, disi, subIndex, map);
} else {
// otherwise: do collect+map on the fly
accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map);
}
}
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space on-the-fly */
static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int term = si.getOrd(doc);
@ -246,8 +257,41 @@ public class DocValuesFacets {
}
}
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space */
/** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */
static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
// First count in seg-ord space:
final int segCounts[];
if (map == null) {
segCounts = counts;
} else {
segCounts = new int[1+si.getValueCount()];
}
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
segCounts[1+si.getOrd(doc)]++;
}
// migrate to global ords (if necessary)
if (map != null) {
migrateGlobal(counts, segCounts, subIndex, map);
}
}
/** accumulates per-segment multi-valued facet counts */
static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) {
// no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic):
// collect separately per-segment, then map to global ords
accumMultiSeg(counts, si, disi, subIndex, map);
} else {
// otherwise: do collect+map on the fly
accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map);
}
}
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */
static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
si.setDocument(doc);
@ -269,4 +313,47 @@ public class DocValuesFacets {
} while ((term = (int) si.nextOrd()) >= 0);
}
}
/** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */
static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
// First count in seg-ord space:
final int segCounts[];
if (map == null) {
segCounts = counts;
} else {
segCounts = new int[1+(int)si.getValueCount()];
}
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
si.setDocument(doc);
int term = (int) si.nextOrd();
if (term < 0) {
counts[0]++; // missing
} else {
do {
segCounts[1+term]++;
} while ((term = (int)si.nextOrd()) >= 0);
}
}
// migrate to global ords (if necessary)
if (map != null) {
migrateGlobal(counts, segCounts, subIndex, map);
}
}
/** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) */
static void migrateGlobal(int counts[], int segCounts[], int subIndex, OrdinalMap map) {
// missing count
counts[0] += segCounts[0];
// migrate actual ordinals
for (int ord = 1; ord < segCounts.length; ord++) {
int count = segCounts[ord];
if (count != 0) {
counts[1+(int) map.getGlobalOrd(subIndex, ord-1)] += count;
}
}
}
}

View File

@ -566,7 +566,7 @@ public class SimpleFacets {
throw se;
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Exception during facet.field: " + workerFacetValue, e.getCause());
"Exception during facet.field: " + workerFacetValue, e);
} finally {
semaphore.release();
}

View File

@ -296,11 +296,11 @@ public class BitDocSet extends DocSetBase {
@Override
public long cost() {
// we don't want to actually compute cardinality, but
// if its already been computed, we use it
// if its already been computed, we use it (pro-rated for the segment)
if (size != -1) {
return size;
return (long)(size * ((OpenBitSet.bits2words(maxDoc)<<6) / (float)bs.capacity()));
} else {
return bs.capacity();
return maxDoc;
}
}
};