From cafdce601cd20bd27186c78cca76d95ec3eaecac Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 30 Nov 2013 01:32:26 +0000 Subject: [PATCH] SOLR-5512: Optimize DocValuesFacets git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1546675 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 2 + .../apache/solr/request/DocValuesFacets.java | 93 ++++++++++++++++++- .../org/apache/solr/request/SimpleFacets.java | 2 +- .../org/apache/solr/search/BitDocSet.java | 6 +- 4 files changed, 96 insertions(+), 7 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 1138c397b21..e81dc7941e0 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -174,6 +174,8 @@ Optimizations * SOLR-5189: Solr 4.x Web UI Log Viewer does not display 'date' column from logs (steffkes) +* SOLR-5512: Optimize DocValuesFacets. (Robert Muir) + Other Changes --------------------- diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java index 98a2140759c..81c04f87cde 100644 --- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java +++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java @@ -232,9 +232,20 @@ public class DocValuesFacets { return res; } - /** accumulates per-segment single-valued facet counts, mapping to global ordinal space */ - // specialized since the single-valued case is different + /** accumulates per-segment single-valued facet counts */ static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) { + // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): + // collect separately per-segment, then map to global ords + accumSingleSeg(counts, si, disi, subIndex, map); + } else { + // otherwise: do collect+map on the fly + accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map); + } + } + + /** accumulates per-segment single-valued facet counts, mapping to global ordinal space on-the-fly */ + static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); @@ -246,8 +257,41 @@ public class DocValuesFacets { } } - /** accumulates per-segment multi-valued facet counts, mapping to global ordinal space */ + /** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ + static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + // First count in seg-ord space: + final int segCounts[]; + if (map == null) { + segCounts = counts; + } else { + segCounts = new int[1+si.getValueCount()]; + } + + int doc; + while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + segCounts[1+si.getOrd(doc)]++; + } + + // migrate to global ords (if necessary) + if (map != null) { + migrateGlobal(counts, segCounts, subIndex, map); + } + } + + /** accumulates per-segment multi-valued facet counts */ static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) { + // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): + // collect separately per-segment, then map to global ords + accumMultiSeg(counts, si, disi, subIndex, map); + } else { + // otherwise: do collect+map on the fly + accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map); + } + } + + /** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */ + static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); @@ -269,4 +313,47 @@ public class DocValuesFacets { } while ((term = (int) si.nextOrd()) >= 0); } } + + /** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ + static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + // First count in seg-ord space: + final int segCounts[]; + if (map == null) { + segCounts = counts; + } else { + segCounts = new int[1+(int)si.getValueCount()]; + } + + int doc; + while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + si.setDocument(doc); + int term = (int) si.nextOrd(); + if (term < 0) { + counts[0]++; // missing + } else { + do { + segCounts[1+term]++; + } while ((term = (int)si.nextOrd()) >= 0); + } + } + + // migrate to global ords (if necessary) + if (map != null) { + migrateGlobal(counts, segCounts, subIndex, map); + } + } + + /** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) */ + static void migrateGlobal(int counts[], int segCounts[], int subIndex, OrdinalMap map) { + // missing count + counts[0] += segCounts[0]; + + // migrate actual ordinals + for (int ord = 1; ord < segCounts.length; ord++) { + int count = segCounts[ord]; + if (count != 0) { + counts[1+(int) map.getGlobalOrd(subIndex, ord-1)] += count; + } + } + } } diff --git a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java index f1d2965eafa..1fdeec23198 100644 --- a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java +++ b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java @@ -566,7 +566,7 @@ public class SimpleFacets { throw se; } catch (Exception e) { throw new SolrException(ErrorCode.SERVER_ERROR, - "Exception during facet.field: " + workerFacetValue, e.getCause()); + "Exception during facet.field: " + workerFacetValue, e); } finally { semaphore.release(); } diff --git a/solr/core/src/java/org/apache/solr/search/BitDocSet.java b/solr/core/src/java/org/apache/solr/search/BitDocSet.java index 0346ab71a57..f793982a9ff 100644 --- a/solr/core/src/java/org/apache/solr/search/BitDocSet.java +++ b/solr/core/src/java/org/apache/solr/search/BitDocSet.java @@ -296,11 +296,11 @@ public class BitDocSet extends DocSetBase { @Override public long cost() { // we don't want to actually compute cardinality, but - // if its already been computed, we use it + // if its already been computed, we use it (pro-rated for the segment) if (size != -1) { - return size; + return (long)(size * ((OpenBitSet.bits2words(maxDoc)<<6) / (float)bs.capacity())); } else { - return bs.capacity(); + return maxDoc; } } };