From fbd467ad766829c4151cea6a3a51f8bdcbcbff00 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Wed, 22 Jul 2015 17:54:36 +0000 Subject: [PATCH] SOLR-7446: simplify missing bucket handling git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692304 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/solr/search/facet/FacetField.java | 48 ++----------------- .../facet/FacetFieldProcessorNumeric.java | 32 ++----------- .../solr/search/facet/FacetProcessor.java | 7 +-- .../apache/solr/search/facet/FacetQuery.java | 2 +- .../apache/solr/search/facet/FacetRange.java | 2 +- .../solr/search/facet/UnInvertedField.java | 2 +- 6 files changed, 17 insertions(+), 76 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java index 420bc4ca8db..389ebd7187e 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java @@ -53,7 +53,6 @@ import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.TrieField; -import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.HashDocSet; import org.apache.solr.search.SolrIndexSearcher; @@ -174,7 +173,6 @@ abstract class FacetFieldProcessor extends FacetProcessor { SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets. SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist. - SpecialSlotAcc missingAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist. FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) { @@ -502,7 +500,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { int maxSlots; int allBucketsSlot = -1; // slot for the primary Accs (countAcc, collectAcc) - int missingSlot = -1; public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) { super(fcontext, freq, sf); @@ -538,9 +535,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { if (freq.allBuckets) { allBucketsSlot = maxSlots++; } - if (freq.missing) { - missingSlot = maxSlots++; - } createCollectAcc(nDocs, maxSlots); @@ -548,11 +542,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); } - if (freq.missing) { - // TODO: optimize case when missingSlot can be contiguous with other slots - missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1); - } - collectDocs(); return findTopSlots(); @@ -587,7 +576,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { }; Slot bottom = null; - for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) { + for (int i = 0; i < nTerms; i++) { // screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets) if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) { continue; @@ -672,29 +661,8 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { if (freq.missing) { SimpleOrderedMap missingBucket = new SimpleOrderedMap<>(); - fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field)); + fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); - - /*** TODO - OPTIMIZE - DocSet missingDocSet = null; - if (startTermIndex == -1) { - fillBucket(missingBucket, countAcc.getCount(0), null); - } else { - missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field); - // an extra slot was added to the end for this missing bucket - countAcc.incrementCount(nTerms, missingDocSet.size()); - collect(missingDocSet, nTerms); - addStats(missingBucket, nTerms); - } - - if (freq.getSubFacets().size() > 0) { - // TODO: we can do better than this! - if (missingDocSet == null) { - missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field); - } - processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet); - } - ***/ } return res; @@ -751,9 +719,6 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase { endTermIndex = (int)si.getValueCount(); } - // optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1) - startTermIndex = startTermIndex==0 && freq.missing ? -1 : startTermIndex; - nTerms = endTermIndex - startTermIndex; } @@ -809,6 +774,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int segOrd = singleDv.getOrd(doc); + if (segOrd < 0) continue; collect(doc, segOrd, toGlobal); } } @@ -817,11 +783,8 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { multiDv.setDocument(doc); - int segOrd = (int)multiDv.nextOrd(); - collect(doc, segOrd, toGlobal); // collect anything the first time (even -1 for missing) - if (segOrd < 0) continue; for(;;) { - segOrd = (int)multiDv.nextOrd(); + int segOrd = (int)multiDv.nextOrd(); if (segOrd < 0) break; collect(doc, segOrd, toGlobal); } @@ -837,8 +800,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase { if (collectAcc != null) { collectAcc.collect(doc, arrIdx); } - // since this can be called for missing, we need to ensure it's currently not. - if (allBucketsAcc != null && ord >= 0) { + if (allBucketsAcc != null) { allBucketsAcc.collect(doc, arrIdx); } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java index 6d02d8e1178..ecce7c88f2c 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java @@ -32,6 +32,7 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocSetCollector; class FacetFieldProcessorNumeric extends FacetFieldProcessor { static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests @@ -137,10 +138,8 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor { super(fcontext, freq, sf); } - int missingSlot = -1; int allBucketsSlot = -1; - @Override public void process() throws IOException { super.process(); @@ -148,18 +147,14 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor { } private void doRehash(LongCounts table) { - if (collectAcc == null && missingAcc == null && allBucketsAcc == null) return; + if (collectAcc == null && allBucketsAcc == null) return; // Our "count" acc is backed by the hash table and will already be rehashed // otherAccs don't need to be rehashed int newTableSize = table.numSlots(); int numSlots = newTableSize; - final int oldMissingSlot = missingSlot; final int oldAllBucketsSlot = allBucketsSlot; - if (oldMissingSlot >= 0) { - missingSlot = numSlots++; - } if (oldAllBucketsSlot >= 0) { allBucketsSlot = numSlots++; } @@ -178,9 +173,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor { if (oldSlot < mapping.length) { return mapping[oldSlot]; } - if (oldSlot == oldMissingSlot) { - return missingSlot; - } if (oldSlot == oldAllBucketsSlot) { return allBucketsSlot; } @@ -192,9 +184,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor { if (collectAcc != null) { collectAcc.resize(resizer); } - if (missingAcc != null) { - missingAcc.resize(resizer); - } if (allBucketsAcc != null) { allBucketsAcc.resize(resizer); } @@ -225,9 +214,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor { int numMissing = 0; - if (freq.missing) { - missingSlot = numSlots++; - } + if (freq.allBuckets) { allBucketsSlot = numSlots++; } @@ -302,11 +289,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor { allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); } - if (freq.missing) { - // TODO: optimize case when missingSlot can be contiguous with other slots - missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1); - } - NumericDocValues values = null; Bits docsWithField = null; @@ -335,11 +317,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor { int segDoc = doc - segBase; long val = values.get(segDoc); - if (val == 0 && !docsWithField.get(segDoc)) { - if (missingAcc != null) { - missingAcc.collect(segDoc, -1); - } - } else { + if (val != 0 && docsWithField.get(segDoc)) { int slot = table.add(val); // this can trigger a rehash rehash // countAcc.incrementCount(slot, 1); @@ -428,7 +406,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor { // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway. SimpleOrderedMap missingBucket = new SimpleOrderedMap<>(); - fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field)); + fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null); res.add("missing", missingBucket); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java index 14983e737e1..a5645b9f942 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java @@ -283,15 +283,16 @@ public class FacetProcessor { } - public void fillBucket(SimpleOrderedMap bucket, Query q) throws IOException { + public void fillBucket(SimpleOrderedMap bucket, Query q, DocSet result) throws IOException { boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0; // TODO: always collect counts or not??? - DocSet result = null; int count; - if (needDocSet) { + if (result != null) { + count = result.size(); + } else if (needDocSet) { if (q == null) { result = fcontext.base; // result.incref(); // OFF-HEAP diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java index 909a5ee6920..3341d306e14 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java @@ -54,7 +54,7 @@ class FacetQueryProcessor extends FacetProcessor { public void process() throws IOException { super.process(); response = new SimpleOrderedMap<>(); - fillBucket(response, freq.q); + fillBucket(response, freq.q, null); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java index 3b13967b675..4e76e10342c 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java @@ -336,7 +336,7 @@ class FacetRangeProcessor extends FacetProcessor { } Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper); - fillBucket(bucket, rangeQ); + fillBucket(bucket, rangeQ, null); return bucket; } diff --git a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java index d4a738c1688..300b6d8a6dc 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java @@ -394,7 +394,7 @@ public class UnInvertedField extends DocTermOrds { public void collectDocs(FacetFieldProcessorUIF processor) throws IOException { - if (processor.collectAcc==null && processor.missingAcc == null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) { + if (processor.collectAcc==null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) { getCounts(processor, processor.countAcc); return; }